From c768fb3b88eb278c52d0950709a7d4f376a89567 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Jan 2024 14:19:07 +0000 Subject: [PATCH 01/90] Fix to align xmaps to the right position in the cell --- src/ligand_neighbourhood_alignment/get_alignability.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index 40ef745a..351236b5 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -1,6 +1,7 @@ import gemmi import numpy as np from loguru import logger +from rich import print as rprint from ligand_neighbourhood_alignment.data import ( LigandNeighbourhood, @@ -189,7 +190,7 @@ def _update_ligand_neighbourhood_transforms( matches.append(ligand_2_id) if len(matches) == 0: - logger.warning(f"No Matches For {ligand_1_id}! No alignments will be generated!") + rprint(f"No Matches For {ligand_1_id}! No alignments will be generated!") # else: # connectivities.append(0) From c152d2ee3641075bb4c34ac7f8118c2ef007245d Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:16:19 +0000 Subject: [PATCH 02/90] Fix to align xmaps to the right position in the cell --- src/ligand_neighbourhood_alignment/cli.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index cd868951..ff695b52 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -572,7 +572,10 @@ def _get_structure_fragments(dataset: dt.Dataset, structure): def _get_dataset_neighbourhoods( - dataset: dt.Dataset, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly], max_radius: float = 7.0 + dataset: dt.Dataset, + xtalform: dt.XtalForm, + assemblies: dict[str, dt.Assembly], + max_radius: float = 9.0 ) -> dict[tuple[str, str, str], dt.Neighbourhood]: # Load the structure logger.debug(dataset.pdb) @@ -588,7 +591,11 @@ def _get_dataset_neighbourhoods( logger.debug(fragments) # Construct the neighbourhood search - ns: gemmi.NeighborSearch = gemmi.NeighborSearch(assembly[0], assembly.cell, max_radius).populate() + ns: gemmi.NeighborSearch = gemmi.NeighborSearch( + assembly[0], + assembly.cell, + max_radius, + ).populate() # For each bound fragment, identify the neighbourhood atoms and # partition them into model and artefact From 3e9fda6ddbb3985f9e043b56f23f43a1fdf89940 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Jan 2024 15:40:45 +0000 Subject: [PATCH 03/90] Fix to align xmaps to the right position in the cell --- src/ligand_neighbourhood_alignment/cli.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index ff695b52..6abc1ec4 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1142,6 +1142,11 @@ def _update( # for conformer_site_id, conformer_site in canonical_site.conformer_sites.items(): # for lid in conformer_site.ligand_ids: for dtag, dataset_alignment_info in fs_model.alignments.items(): + if dtag not in new_datasets: + print(f"Already processed dataset: {dtag}") + + continue + for chain, chain_alignment_info in dataset_alignment_info.items(): for residue, ligand_neighbourhood_output in chain_alignment_info.items(): for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): From 121b028a33bb89d00d300e5e77f4be670db1173e Mon Sep 17 00:00:00 2001 From: Tim Dudgeon Date: Thu, 25 Jan 2024 16:12:05 +0000 Subject: [PATCH 04/90] renamed map files --- src/ligand_neighbourhood_alignment/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index c7eaaef6..570ceec8 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -34,8 +34,8 @@ OUTPUT_JSON_PATH: str = "output.json" ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}.pdb" ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_artefacts.pdb" -ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_2FoFc.ccp4" -ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_FoFc.ccp4" +ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_sigmaa.ccp4" +ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_diff.ccp4" ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_event.ccp4" From 1d2e32337fe74f1051122cc6560d89ba225d7a0e Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:00:16 +0000 Subject: [PATCH 05/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/cli.py | 28 ++++++++----------- .../get_alignability.py | 2 +- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 6abc1ec4..f695c005 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -643,6 +643,7 @@ def _save_ligand_neighbourhood_transforms(fs_model, ligand_neighbourhood_transfo def _update_graph(alignability_graph, ligand_neighbourhood_transforms): + nodes = alignability_graph.nodes edges = alignability_graph.edges for to_ligand_id, from_ligand_id in ligand_neighbourhood_transforms: @@ -1009,18 +1010,13 @@ def _update( # for dtag, dataset in new_datasets.items(): for lid, neighbourhood in ligand_neighbourhoods.items(): - if lid[0] in new_datasets: - _update_ligand_neighbourhood_transforms( - ligand_neighbourhood_transforms, - lid, - ligand_neighbourhoods, - structures, - ) - else: - logger.info(f'Skipping ligand: {lid}: Not in a new dataset!') - # alignments, transforms = _get_alignments() - # for target_lid, transform in transforms.items(): - # ligand_neighbourhood_transforms[(lid, target_lid)] = transform + _update_ligand_neighbourhood_transforms( + ligand_neighbourhood_transforms, + lid, + ligand_neighbourhoods, + structures, + ) + logger.info(f"Now have {len(ligand_neighbourhood_transforms)} alignments between neighbourhoods") print(ligand_neighbourhood_transforms) _save_ligand_neighbourhood_transforms(fs_model, ligand_neighbourhood_transforms) @@ -1046,6 +1042,9 @@ def _update( logger.info(f"Previously had {len(conformer_sites)} conformer sites") for connected_component in connected_components: + # Update new datasets to indicate everything sharing a connected component + # + # Match new component to old ones by membership, and expand old ones if available otherwise create new one _update_conformer_sites(conformer_sites, connected_component, ligand_neighbourhoods, structures) logger.info(f"Now have {len(conformer_sites)} conformer sites") @@ -1142,11 +1141,6 @@ def _update( # for conformer_site_id, conformer_site in canonical_site.conformer_sites.items(): # for lid in conformer_site.ligand_ids: for dtag, dataset_alignment_info in fs_model.alignments.items(): - if dtag not in new_datasets: - print(f"Already processed dataset: {dtag}") - - continue - for chain, chain_alignment_info in dataset_alignment_info.items(): for residue, ligand_neighbourhood_output in chain_alignment_info.items(): for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index 351236b5..a5089fe0 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 5, + min_alignable_atoms: int = 7, max_alignable_rmsd: float = 2.0, ): From 8cf735c2b8eba08471dd948fc540c7e2dda7b5c1 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:14:24 +0000 Subject: [PATCH 06/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/get_alignability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index a5089fe0..b2fc3b15 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 7, + min_alignable_atoms: int = 10, max_alignable_rmsd: float = 2.0, ): From 7222dd641522f3f9e2ba28aeeb3c0cb70c5dbf44 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 12:59:59 +0000 Subject: [PATCH 07/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index f695c005..89a568f5 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1227,7 +1227,7 @@ def _update( moving_ligand_id = (dtag, chain, residue) reference_ligand_id = conformer_site.reference_ligand_id - print(ligand_neighbourhoods) + # print(ligand_neighbourhoods) xmap_path = datasets[dtag].ligand_binding_events[(dtag, chain, residue)].xmap From f2eb368fe8be63665bb226d0b1333e329f9a661d Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 13:22:13 +0000 Subject: [PATCH 08/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/cli.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 89a568f5..66833b1f 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -642,15 +642,24 @@ def _save_ligand_neighbourhood_transforms(fs_model, ligand_neighbourhood_transfo yaml.safe_dump(dic, f) -def _update_graph(alignability_graph, ligand_neighbourhood_transforms): +def _update_graph( + alignability_graph, + ligand_neighbourhoods, + ligand_neighbourhood_transforms, +): nodes = alignability_graph.nodes edges = alignability_graph.edges + + for ligand_id in ligand_neighbourhoods: + if ligand_id not in nodes: + alignability_graph.add_node(ligand_id) + for to_ligand_id, from_ligand_id in ligand_neighbourhood_transforms: - if to_ligand_id not in nodes: - alignability_graph.add_node(to_ligand_id) - if from_ligand_id not in nodes: - alignability_graph.add_node(from_ligand_id) + # if to_ligand_id not in nodes: + # alignability_graph.add_node(to_ligand_id) + # if from_ligand_id not in nodes: + # alignability_graph.add_node(from_ligand_id) if (to_ligand_id, from_ligand_id) not in edges: alignability_graph.add_edge(to_ligand_id, from_ligand_id) @@ -1027,6 +1036,7 @@ def _update( logger.info(f"Previously had {len(alignability_graph.edges)} edges") _update_graph( alignability_graph, + ligand_neighbourhoods, ligand_neighbourhood_transforms, ) logger.info(f"Now have {len(alignability_graph.nodes)} nodes") From d2dfe8884d40c1deb1873dec79523fe5af816c85 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 13:58:59 +0000 Subject: [PATCH 09/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/get_alignability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index b2fc3b15..351236b5 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 10, + min_alignable_atoms: int = 5, max_alignable_rmsd: float = 2.0, ): From b30989e5db9888b49550c801ce753dbc703de7c5 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 26 Jan 2024 13:59:35 +0000 Subject: [PATCH 10/90] Test splitting alignment graph --- src/ligand_neighbourhood_alignment/get_alignability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index 351236b5..902484ba 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 5, + min_alignable_atoms: int = 5, # 10 splits A71, but some things almost identical end up in different clusters max_alignable_rmsd: float = 2.0, ): From 69c7a47f80d1ec3ceffc2d863e9219cd55ac4114 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:23:17 +0000 Subject: [PATCH 11/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 105 ++++++++++++++++-- .../constants.py | 2 + src/ligand_neighbourhood_alignment/dt.py | 6 + 3 files changed, 106 insertions(+), 7 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 66833b1f..067b9f81 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -683,13 +683,74 @@ def _save_graph(fs_model, alignability_graph): ) -def _get_connected_components(alignability_graph): - cliques = list(nx.connected_components(alignability_graph)) - return cliques +# def _get_connected_components(alignability_graph): +# cliques = list(nx.connected_components(alignability_graph)) +# return cliques +def _get_connected_components( + alignability_graph, + clusters, + max_path_length=2 +): + """ + Construct neighbourhoods around the most connected neighbourhoods by some max path length, + + + """ + + # Get the graph of short paths + path = dict(nx.all_pairs_shortest_path(alignability_graph)) + path_lengths = {(source, target): len(path[source][target]) for source in path for target in path[source]} + H = nx.Graph() + for node in path: + H.add_node(node) + for source, target in path_lengths: + if path_lengths[(source, target)] <= max_path_length: + H.add_edge( + source, + target + ) + + + + # + degrees = dict(nx.degree(H)) + + # Replay cluster cores + used = sum([cluster for cluster in clusters.values()]) + for x in clusters: + for target in H.nodes: + if target in used: + continue + if (x, target) not in path_lengths: + continue + if path_lengths[(x, target)] <= 2: + used.append(target) + clusters[x].append(target) + + # Now go through any new ligands that are not yet connected, constructing clusters for them + for x in sorted(degrees, key=lambda _x: degrees[_x], reverse=True): + if x in used: + continue + clusters[x] = [] + print(f'f{x} : {degrees[x]}') + + # for n in G.neighbors(x): + # used.append(n) + for target in H.nodes: + if target in used: + continue + if (x, target) not in path_lengths: + continue + if path_lengths[(x, target)] <= 2: + used.append(target) + clusters[x].append(target) + + return clusters def _update_conformer_sites( conformer_sites: dict[str, dt.ConformerSite], + connected_component_id: tuple[str, str, str], connected_component: list[tuple[str, str, str]], neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], structures @@ -715,11 +776,18 @@ def _update_conformer_sites( conformer_site = dt.ConformerSite( [x for x in set(residues)], connected_component, - [x for x in connected_component][0] + # [x for x in connected_component][0] + connected_component_id ) conformer_site_id = "+".join(conformer_site.reference_ligand_id) conformer_sites[conformer_site_id] = conformer_site +def _save_connected_components(fs_model, connected_components): + with open(fs_model.connected_components, 'w') as f: + yaml.safe_dump(connected_components, f) + + + def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): with open(fs_model.conformer_sites, 'w') as f: @@ -976,6 +1044,7 @@ def _update( dataset_assignments: dict[str, str], ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], alignability_graph, + connected_components, ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], conformer_sites: dict[str, dt.ConformerSite], conformer_site_transforms: dict[tuple[str, str], dt.Transform], @@ -1047,16 +1116,23 @@ def _update( # Update conformer sites logger.info(f"Updating conformer sites...") - connected_components = _get_connected_components(alignability_graph) + connected_components = _get_connected_components(alignability_graph, connected_components) + _save_connected_components(fs_model, connected_components) logger.info(f"Got {len(connected_components)} connected components") logger.info(f"Previously had {len(conformer_sites)} conformer sites") - for connected_component in connected_components: + for connected_component_id, connected_component in connected_components.items(): # Update new datasets to indicate everything sharing a connected component # # Match new component to old ones by membership, and expand old ones if available otherwise create new one - _update_conformer_sites(conformer_sites, connected_component, ligand_neighbourhoods, structures) + _update_conformer_sites( + conformer_sites, + connected_component_id, + connected_component, + ligand_neighbourhoods, + structures, + ) logger.info(f"Now have {len(conformer_sites)} conformer sites") for conformer_site_id, conformer_site in conformer_sites.items(): print(conformer_site_id) @@ -1437,6 +1513,15 @@ def _load_alignability_graph(alignability_graph): else: return nx.Graph() +def _load_connected_components(connected_components_yaml): + connected_components = {} + if connected_components_yaml.exists(): + + with open(connected_components_yaml, 'r') as f: + connected_components = yaml.safe_load(f) + + return connected_components + def _load_ligand_neighbourhood_transforms(ligand_neighbourhood_transforms_yaml): ligand_neighbourhood_transforms = {} @@ -1647,6 +1732,11 @@ def update(self, options_json: str): else: alignability_graph = _load_alignability_graph(fs_model.alignability_graph) + if source_fs_model: + connected_components = _load_connected_components(source_fs_model.connected_components) + else: + connected_components = _load_connected_components(fs_model.connected_components) + # logger.info(f"Getting lighand neighbourhood transforms...") if source_fs_model: @@ -1717,6 +1807,7 @@ def update(self, options_json: str): dataset_assignments, ligand_neighbourhoods, alignability_graph, + connected_components, ligand_neighbourhood_transforms, conformer_sites, conformer_site_transforms, diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index c7eaaef6..5a0aa4ba 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -1,4 +1,5 @@ ALIGNABILITY_GRAPH_FILE_NAME: str = "alignability.gml" +CONNECTED_COMPONENTS_FILE_NAME: str = "connected_components.json" TRANSFORMS_FILE_NAME: str = "transforms.json" NEIGHBOURHOODS_FILE_NAME: str = "neighbourhoods.json" DATA_JSON_PATH: str = "data.json" @@ -44,6 +45,7 @@ XTALFORMS_YAML_FILE_NAME = "xtalforms.yaml" ASSIGNED_XTALFORMS_YAML_FILE_NAME = "assigned_xtalforms.yaml" NEIGHBOURHOODS_YAML_FILE_NAME = "neighbourhoods.yaml" +CONNECTED_COMPONENTS_YAML_NAME = "connected_components.yaml" TRANSFORMS_YAML_FILE_NAME = "neighbourhood_transforms.yaml" CONFORMER_SITE_YAML_FILE = "conformer_sites.yaml" CONFORMER_SITES_TRANSFORMS_YAML_FILE_NAME = "conformer_site_transforms.yaml" diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index dc5ef9d1..f509371e 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -64,6 +64,7 @@ def __init__( dataset_assignments, ligand_neighbourhoods, alignability_graph, + connected_components, ligand_neighbourhood_transforms, conformer_sites, conformer_site_transforms, @@ -81,6 +82,7 @@ def __init__( self.dataset_assignments = dataset_assignments self.ligand_neighbourhoods = ligand_neighbourhoods self.alignability_graph = alignability_graph + self.connected_components = connected_components self.ligand_neighbourhood_transforms = ligand_neighbourhood_transforms self.conformer_sites = conformer_sites self.conformer_site_transforms = conformer_site_transforms @@ -160,6 +162,7 @@ def from_dir( dataset_assignments = source_dir / constants.ASSIGNED_XTALFORMS_YAML_FILE_NAME ligand_neighbourhoods = source_dir / constants.NEIGHBOURHOODS_YAML_FILE_NAME alignability_graph = source_dir / constants.ALIGNABILITY_GRAPH_FILE_NAME + connected_components = source_dir / constants.CONNECTED_COMPONENTS_YAML_NAME ligand_neighbourhood_transforms = source_dir / constants.TRANSFORMS_YAML_FILE_NAME conformer_sites = source_dir / constants.CONFORMER_SITE_YAML_FILE conformer_site_transforms = source_dir / constants.CONFORMER_SITES_TRANSFORMS_YAML_FILE_NAME @@ -178,6 +181,7 @@ def from_dir( dataset_assignments, ligand_neighbourhoods, alignability_graph, + connected_components, ligand_neighbourhood_transforms, conformer_sites, conformer_site_transforms, @@ -228,6 +232,7 @@ def from_dict(dic): dataset_assignments=Path(dic['dataset_assignments']), ligand_neighbourhoods=Path(dic['ligand_neighbourhoods']), alignability_graph=Path(dic['alignability_graph']), + connected_components=Path(dic['connected_components']), ligand_neighbourhood_transforms=Path(dic['ligand_neighbourhood_transforms']), conformer_sites=Path(dic['conformer_sites']), conformer_site_transforms=Path(dic['conformer_site_transforms']), @@ -268,6 +273,7 @@ def to_dict(self, ): 'dataset_assignments': str(self.dataset_assignments), 'ligand_neighbourhoods': str(self.ligand_neighbourhoods), 'alignability_graph': str(self.alignability_graph), + 'connected_components': str(self.connected_components), 'ligand_neighbourhood_transforms': str(self.ligand_neighbourhood_transforms), 'conformer_sites': str(self.conformer_sites), 'conformer_site_transforms': str(self.conformer_site_transforms), From b8371f1862d47612966a7bde667e76c296344cd1 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:36:24 +0000 Subject: [PATCH 12/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 067b9f81..9152016f 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -717,7 +717,7 @@ def _get_connected_components( degrees = dict(nx.degree(H)) # Replay cluster cores - used = sum([cluster for cluster in clusters.values()]) + used = [member for cluster in clusters for member in cluster] for x in clusters: for target in H.nodes: if target in used: From 0310591e7b76cee14f9cf91e9051399294411e9b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:49:50 +0000 Subject: [PATCH 13/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 9152016f..871cc062 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1066,6 +1066,7 @@ def _update( structures, ) _save_assignments(fs_model, dataset_assignments) + logger.info(f"Assigned {len(dataset_assignments)} xtalform assignments to datasets!") # Get neighbourhoods logger.info(f"Updating neighbourhoods") @@ -1074,6 +1075,7 @@ def _update( neighborhoods = _get_neighbourhoods(dataset, xtalform, assemblies) for lid, neighbourhood in neighborhoods.items(): ligand_neighbourhoods[lid] = neighbourhood + logger.info(f"Found {len(neighborhoods)} ligand neighbourhoods!") _save_neighbourhoods(fs_model, ligand_neighbourhoods) for nid, neighbourhood in ligand_neighbourhoods.items(): From a6963b390bf79250ab5d6011043e8f4a97f5ae73 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:52:54 +0000 Subject: [PATCH 14/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 871cc062..bfb2d3f9 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1075,7 +1075,7 @@ def _update( neighborhoods = _get_neighbourhoods(dataset, xtalform, assemblies) for lid, neighbourhood in neighborhoods.items(): ligand_neighbourhoods[lid] = neighbourhood - logger.info(f"Found {len(neighborhoods)} ligand neighbourhoods!") + logger.info(f"Found {len(ligand_neighbourhoods)} ligand neighbourhoods!") _save_neighbourhoods(fs_model, ligand_neighbourhoods) for nid, neighbourhood in ligand_neighbourhoods.items(): From 2c0838960baeecb0fcfe534759b09bb1f5e61139 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:58:09 +0000 Subject: [PATCH 15/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index bfb2d3f9..d978d099 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1073,6 +1073,7 @@ def _update( for dtag, dataset in new_datasets.items(): xtalform = xtalforms[dataset_assignments[dtag]] neighborhoods = _get_neighbourhoods(dataset, xtalform, assemblies) + logger.info(f"Dataset {dtag} has {len(neighborhoods)} ligand neighbourhoods") for lid, neighbourhood in neighborhoods.items(): ligand_neighbourhoods[lid] = neighbourhood logger.info(f"Found {len(ligand_neighbourhoods)} ligand neighbourhoods!") From b8be5df9f448e1370fbeeac1fea8046bd00c0722 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:09:42 +0000 Subject: [PATCH 16/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index d978d099..c60f6c85 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -550,7 +550,7 @@ def _get_structure_fragments(dataset: dt.Dataset, structure): for model in structure: for chain in model: source_chain, biomol_chain, transform = chain.name.split("~") - for residue in chain.get_ligands(): + for residue in chain: #.get_ligands(): for lbe in dataset.ligand_binding_events: # if ( # (residue.name == "LIG") From 6b94169ca16ce976186232d1e2784f7d792c3b5f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:10:12 +0000 Subject: [PATCH 17/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index c60f6c85..b2ea6eac 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -561,7 +561,7 @@ def _get_structure_fragments(dataset: dt.Dataset, structure): # ligand_id = (dataset.dtag, str(chain.name), str(lbe.residue),) # fragments[ligand_id] = residue # lig_number = lig_number + 1 - if (lbe[2] == str(residue.seqid.num)) & (lbe[1] == str(source_chain)) & (transform == "x,y,z"): + if (str(lbe[2]) == str(residue.seqid.num)) & (str(lbe[1]) == str(source_chain)) & (transform == "x,y,z"): ligand_id = (dataset.dtag, str(lbe[1]), str(lbe[2]),) fragments[ligand_id] = residue From 69322a55e25c2e83e31d50b75c54e333c418c306 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:56:58 +0000 Subject: [PATCH 18/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index b2ea6eac..17dc2187 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -801,6 +801,7 @@ def _update_canonical_sites( canonical_sites: dict[str, dt.CanonicalSite], conformer_site: dt.ConformerSite, conformer_site_id, + min_shared_residues=6 ): if len(canonical_sites) != 0: global_reference_dtag = [x for x in canonical_sites.values()][0].global_reference_dtag @@ -815,7 +816,7 @@ def _update_canonical_sites( canonical_site_residues = [(residue[1], residue[2]) for residue in canonical_site.residues] if conformer_site_id not in canonical_site.conformer_site_ids: v = set(canonical_site_residues).intersection(set(conformer_site_residues)) - if len(v) >= 3: + if len(v) >= min_shared_residues: # Matched! matched = True canonical_site.conformer_site_ids.append(conformer_site_id) From 6bb814bb89727542da249ecd17ab8173bb9f97cb Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 13:58:27 +0000 Subject: [PATCH 19/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 17dc2187..1743d680 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -816,7 +816,7 @@ def _update_canonical_sites( canonical_site_residues = [(residue[1], residue[2]) for residue in canonical_site.residues] if conformer_site_id not in canonical_site.conformer_site_ids: v = set(canonical_site_residues).intersection(set(conformer_site_residues)) - if len(v) >= min_shared_residues: + if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): # Matched! matched = True canonical_site.conformer_site_ids.append(conformer_site_id) From d33679c245a8a567b9f642a27c657a856321883d Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 14:26:52 +0000 Subject: [PATCH 20/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 1743d680..1f1dbdc6 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -782,13 +782,17 @@ def _update_conformer_sites( conformer_site_id = "+".join(conformer_site.reference_ligand_id) conformer_sites[conformer_site_id] = conformer_site -def _save_connected_components(fs_model, connected_components): +def _save_connected_components(fs_model, connected_components): with open(fs_model.connected_components, 'w') as f: + dic = {} + for connected_component_reference, connected_component in connected_components.items(): + dic["+".join(connected_component_reference)] = [ + "+".join(member) + for member + in connected_component + ] yaml.safe_dump(connected_components, f) - - - def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): with open(fs_model.conformer_sites, 'w') as f: dic = {} From 800fd800b255ae4c839d13b5cff791322100dc02 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 14:27:00 +0000 Subject: [PATCH 21/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 1f1dbdc6..21c4d883 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -791,7 +791,7 @@ def _save_connected_components(fs_model, connected_components): for member in connected_component ] - yaml.safe_dump(connected_components, f) + yaml.safe_dump(dic, f) def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): with open(fs_model.conformer_sites, 'w') as f: From fdfe986dc017adc73e2de4f42213838abe6777bf Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:10:48 +0000 Subject: [PATCH 22/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 21c4d883..5c8421a8 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1523,10 +1523,18 @@ def _load_alignability_graph(alignability_graph): def _load_connected_components(connected_components_yaml): connected_components = {} + if connected_components_yaml.exists(): with open(connected_components_yaml, 'r') as f: - connected_components = yaml.safe_load(f) + dic = yaml.safe_load(f) + + if dic: + for ligand_id, neighbourhood_info in dic.items(): + dtag, chain, residue = ligand_id.split("+") + neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) + connected_components[(dtag, chain, residue)] = neighbourhood + return connected_components From acd46d7f46c0b5515edbb91a2966dc1a10a9044c Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:17:00 +0000 Subject: [PATCH 23/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 5c8421a8..bdd4ceab 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -791,7 +791,7 @@ def _save_connected_components(fs_model, connected_components): for member in connected_component ] - yaml.safe_dump(dic, f) + yaml.safe_dump(dic, f, sort_keys=False) def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): with open(fs_model.conformer_sites, 'w') as f: From c6e8fe7280cc2cb0ba9b80d5e3eedbff8251664f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:18:14 +0000 Subject: [PATCH 24/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index bdd4ceab..73271970 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -820,8 +820,10 @@ def _update_canonical_sites( canonical_site_residues = [(residue[1], residue[2]) for residue in canonical_site.residues] if conformer_site_id not in canonical_site.conformer_site_ids: v = set(canonical_site_residues).intersection(set(conformer_site_residues)) - if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): + # if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): + if len(v) >= 0.5*len(canonical_site_residues): # Matched! + matched = True canonical_site.conformer_site_ids.append(conformer_site_id) @@ -1298,7 +1300,13 @@ def _update( # conformer_site = conformer_sites[conformer_site_id] # for lid in conformer_site.members: # _update_aligned_xmaps() - reference_xmap = read_xmap_from_mtz(datasets[[x for x in canonical_sites.values()][0].global_reference_dtag].mtz) + reference_xmap = read_xmap_from_mtz( + datasets[ + [ + x for x in canonical_sites.values() + ][0].global_reference_dtag + ].mtz + ) logger.info(f"Outputting xmaps...") for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): From edf601ce74d7bdf670ba7a62cd1b7b5a27abe597 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 13:18:18 +0000 Subject: [PATCH 25/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 73271970..897fc808 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -823,7 +823,6 @@ def _update_canonical_sites( # if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): if len(v) >= 0.5*len(canonical_site_residues): # Matched! - matched = True canonical_site.conformer_site_ids.append(conformer_site_id) From ae9273fccb0332839ebcac4b7dcaefbe5a3f54da Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 14:12:18 +0000 Subject: [PATCH 26/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 897fc808..e20ba1b2 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -821,7 +821,7 @@ def _update_canonical_sites( if conformer_site_id not in canonical_site.conformer_site_ids: v = set(canonical_site_residues).intersection(set(conformer_site_residues)) # if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): - if len(v) >= 0.5*len(canonical_site_residues): + if len(v) >= 0.75*len(canonical_site_residues): # Matched! matched = True canonical_site.conformer_site_ids.append(conformer_site_id) From 3905fc2bec5bfdaca35e1f52662ee13e85478600 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:33:03 +0000 Subject: [PATCH 27/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index e20ba1b2..08a98778 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1539,8 +1539,12 @@ def _load_connected_components(connected_components_yaml): if dic: for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue = ligand_id.split("+") - neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - connected_components[(dtag, chain, residue)] = neighbourhood + # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) + connected_components[(dtag, chain, residue)] = [ + _ligand_id.split("+") + for _ligand_id + in neighbourhood_info + ] return connected_components From 8494976fc0b77f6e461ff54a71e0115d67c2c54b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:40:40 +0000 Subject: [PATCH 28/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 08a98778..228aca0e 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1540,11 +1540,11 @@ def _load_connected_components(connected_components_yaml): for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - connected_components[(dtag, chain, residue)] = [ + connected_components[(dtag, chain, residue)] = ( _ligand_id.split("+") for _ligand_id in neighbourhood_info - ] + ) return connected_components From b8c8c810a50d0b9271f2b5a408de94c118d14f73 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:43:10 +0000 Subject: [PATCH 29/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 228aca0e..e0af8a72 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1540,7 +1540,7 @@ def _load_connected_components(connected_components_yaml): for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - connected_components[(dtag, chain, residue)] = ( + connected_components[(dtag, chain, residue)] = tuple( _ligand_id.split("+") for _ligand_id in neighbourhood_info From 4348b9dee7eb9a0d86d34d8b8a203d8bdd95ff78 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 31 Jan 2024 20:43:39 +0000 Subject: [PATCH 30/90] Test new graph cluster conformer site formation approach --- src/ligand_neighbourhood_alignment/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index e0af8a72..10f58ec1 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1540,11 +1540,11 @@ def _load_connected_components(connected_components_yaml): for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - connected_components[(dtag, chain, residue)] = tuple( - _ligand_id.split("+") + connected_components[(dtag, chain, residue)] = [ + tuple([x for x in _ligand_id.split("+")]) for _ligand_id in neighbourhood_info - ) + ] return connected_components From 776c2322657d4487e16ccb62bb2d67414068a1aa Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:14:22 +0000 Subject: [PATCH 31/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 43 ++++++++++++------- .../get_alignability.py | 6 +-- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 10f58ec1..7117c2e0 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -544,8 +544,8 @@ def _generate_assembly( return full_st -def _get_structure_fragments(dataset: dt.Dataset, structure): - fragments: dict[tuple[str, str, str], gemmi.Residue] = {} +def _get_structure_fragments(dataset: dt.Dataset, structure, version): + fragments: dict[tuple[str, str, str, str], gemmi.Residue] = {} # lig_number: int = 0 for model in structure: for chain in model: @@ -562,7 +562,7 @@ def _get_structure_fragments(dataset: dt.Dataset, structure): # fragments[ligand_id] = residue # lig_number = lig_number + 1 if (str(lbe[2]) == str(residue.seqid.num)) & (str(lbe[1]) == str(source_chain)) & (transform == "x,y,z"): - ligand_id = (dataset.dtag, str(lbe[1]), str(lbe[2]),) + ligand_id = (dataset.dtag, str(lbe[1]), str(lbe[2]), str(version)) fragments[ligand_id] = residue return fragments @@ -575,8 +575,9 @@ def _get_dataset_neighbourhoods( dataset: dt.Dataset, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly], + version, max_radius: float = 9.0 -) -> dict[tuple[str, str, str], dt.Neighbourhood]: +) -> dict[tuple[str, str, str, str], dt.Neighbourhood]: # Load the structure logger.debug(dataset.pdb) structure = gemmi.read_structure(dataset.pdb) @@ -586,7 +587,11 @@ def _get_dataset_neighbourhoods( assembly = _generate_assembly(xtalform, structure, assemblies) # Get the bound fragments - fragments: dict[tuple[str, str, str], gemmi.Residue] = _get_structure_fragments(dataset, assembly) + fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments( + dataset, + assembly, + version + ) logger.debug(f"Get {len(fragments)} fragment neighbourhoods") logger.debug(fragments) @@ -599,7 +604,7 @@ def _get_dataset_neighbourhoods( # For each bound fragment, identify the neighbourhood atoms and # partition them into model and artefact - fragment_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood] = {} + fragment_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood] = {} for ligand_id, fragment in fragments.items(): fragment_neighbourhoods[ligand_id] = _get_ligand_neighbourhood( assembly, @@ -611,16 +616,21 @@ def _get_dataset_neighbourhoods( return fragment_neighbourhoods -def _get_neighbourhoods(dataset: dt.Dataset, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly]): - dataset_ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood] = _get_dataset_neighbourhoods( - dataset, xtalform, assemblies +def _get_neighbourhoods( + dataset: dt.Dataset, + xtalform: dt.XtalForm, + assemblies: dict[str, dt.Assembly], + version, +): + dataset_ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood] = _get_dataset_neighbourhoods( + dataset, xtalform, assemblies, version ) return dataset_ligand_neighbourhoods def _save_neighbourhoods( fs_model: dt.FSModel, - ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], ): with open(fs_model.ligand_neighbourhoods, 'w') as f: dic = {} @@ -1048,17 +1058,20 @@ def _update( assemblies: dict[str, dt.Assembly], xtalforms: dict[str, dt.XtalForm], dataset_assignments: dict[str, str], - ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + # alignment_landmarks: dict[tuple[str,str,str,int], dict[tuple[str, str, str], dt.Atom]], alignability_graph, connected_components, - ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], + ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], conformer_sites: dict[str, dt.ConformerSite], conformer_site_transforms: dict[tuple[str, str], dt.Transform], canonical_sites: dict[str, dt.CanonicalSite], # canonical_site_transforms: dict[str, dt.Transform], xtalform_sites: dict[str, dt.XtalFormSite], - reference_structure_transforms: dict[tuple[str, str], dt.Transform] + reference_structure_transforms: dict[tuple[str, str], dt.Transform], + version ): + logger.info(f"Version is: {version}") # Get the structures structures: dict = _get_structures(datasets) @@ -1078,7 +1091,7 @@ def _update( logger.info(f"Updating neighbourhoods") for dtag, dataset in new_datasets.items(): xtalform = xtalforms[dataset_assignments[dtag]] - neighborhoods = _get_neighbourhoods(dataset, xtalform, assemblies) + neighborhoods = _get_neighbourhoods(dataset, xtalform, assemblies, version) logger.info(f"Dataset {dtag} has {len(neighborhoods)} ligand neighbourhoods") for lid, neighbourhood in neighborhoods.items(): ligand_neighbourhoods[lid] = neighbourhood @@ -1528,6 +1541,7 @@ def _load_alignability_graph(alignability_graph): else: return nx.Graph() + def _load_connected_components(connected_components_yaml): connected_components = {} @@ -1546,7 +1560,6 @@ def _load_connected_components(connected_components_yaml): in neighbourhood_info ] - return connected_components diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index 902484ba..c21de5f7 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -158,9 +158,9 @@ def _match_cas( return False, None, None def _update_ligand_neighbourhood_transforms( - ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], - lid: tuple[str, str, str], - ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], + ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], + lid: tuple[str, str, str, str], + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], structures, ): # connectivity = [] From 187e92ddafa035a9ffd63f87be25f464170253b4 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:22:41 +0000 Subject: [PATCH 32/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 7117c2e0..296a9f9b 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -958,7 +958,7 @@ def _update_fs_model( for conformer_site_id in canonical_site.conformer_site_ids: conformer_site = conformer_sites[conformer_site_id] for member in conformer_site.members: - dtag, chain, residue = member + dtag, chain, residue, version = member if dtag not in alignments: alignments[dtag] = {} if chain not in alignments[dtag]: From 5886d7c787bcb8477433d9628f6676fa5db474b6 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:23:23 +0000 Subject: [PATCH 33/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 296a9f9b..2a4d88ff 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -951,6 +951,7 @@ def _update_fs_model( conformer_sites: dict[str, dt.ConformerSite], reference_datasets: dict[str, dt.Dataset] ): + # Iterate over canonical sites and their members, checking if they already have an output record and # if not creating one alignments = fs_model.alignments From 4165f35507f9c68e20e827ecb462bba8e84f6f5a Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:26:01 +0000 Subject: [PATCH 34/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 2a4d88ff..9e8d39db 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1513,7 +1513,7 @@ def _load_ligand_neighbourhoods(ligand_neighbourhoods_yaml): if dic: for ligand_id, neighbourhood_info in dic.items(): - dtag, chain, residue = ligand_id.split("/") + dtag, chain, residue, version = ligand_id.split("/") neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) ligand_neighbourhoods[(dtag, chain, residue)] = neighbourhood From ccc67a640d3e76aff8016db6abb0e29a4f3fc3ec Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:26:46 +0000 Subject: [PATCH 35/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 9e8d39db..cb70cb3c 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1553,7 +1553,7 @@ def _load_connected_components(connected_components_yaml): if dic: for ligand_id, neighbourhood_info in dic.items(): - dtag, chain, residue = ligand_id.split("+") + dtag, chain, residue, version = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) connected_components[(dtag, chain, residue)] = [ tuple([x for x in _ligand_id.split("+")]) From 9682dd44645404ed94955658815c4fa4ddf71565 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:27:35 +0000 Subject: [PATCH 36/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index cb70cb3c..24e1174d 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1574,11 +1574,11 @@ def _load_ligand_neighbourhood_transforms(ligand_neighbourhood_transforms_yaml): for ligand_transform_key, ligand_transform in dic.items(): print(ligand_transform_key) ligand_1_id, ligand_2_id = ligand_transform_key.split("~") - dtag_1, chain_1, residue_1 = ligand_1_id.split("/") - dtag_2, chain_2, residue_2 = ligand_2_id.split("/") + dtag_1, chain_1, residue_1, version = ligand_1_id.split("/") + dtag_2, chain_2, residue_2, version = ligand_2_id.split("/") ligand_neighbourhood_transforms[( - (dtag_1, chain_1, residue_1), - (dtag_2, chain_2, residue_2) + (dtag_1, chain_1, residue_1, version), + (dtag_2, chain_2, residue_2, version) )] = dt.Transform.from_dict(ligand_transform) return ligand_neighbourhood_transforms From 1727bcc8531ecb2480223581c81ce636e7e688fd Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:28:27 +0000 Subject: [PATCH 37/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/dt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index f509371e..b5026f0f 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -704,13 +704,13 @@ def from_dict(dic): residues.append((chain, residue, name)) members = [] for member in dic['members']: - dtag, chain, residue = member.split("/") - members.append((dtag, chain, residue)) - ref_dtag, ref_chain, ref_residue = dic["reference_ligand_id"].split("/") + dtag, chain, residue, version = member.split("/") + members.append((dtag, chain, residue, version)) + ref_dtag, ref_chain, ref_residue, version = dic["reference_ligand_id"].split("/") return ConformerSite( residues, members, - (ref_dtag, ref_chain, ref_residue) + (ref_dtag, ref_chain, ref_residue, version) ) def to_dict(self, ): From 0ccb11b5abf29dae5c617101c63d459c5fb9eb0e Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:29:12 +0000 Subject: [PATCH 38/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/dt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index b5026f0f..101ad82b 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -774,8 +774,8 @@ def __init__(self, def from_dict(dic): members = [] for member in dic['members']: - dtag, chain, residue = member.split("/") - members.append((dtag, chain, residue)) + dtag, chain, residue, version = member.split("/") + members.append((dtag, chain, residue, version)) return XtalFormSite( dic['xtalform_id'], dic['crystallographic_chain'], From 068886ed1e1fc4c4665bb427b066c75fb58c2ae2 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:59:37 +0000 Subject: [PATCH 39/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 6 +++--- src/ligand_neighbourhood_alignment/dt.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 24e1174d..9b87f429 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -760,9 +760,9 @@ def _get_connected_components( def _update_conformer_sites( conformer_sites: dict[str, dt.ConformerSite], - connected_component_id: tuple[str, str, str], - connected_component: list[tuple[str, str, str]], - neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood], + connected_component_id: tuple[str, str, str, str], + connected_component: list[tuple[str, str, str, str]], + neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], structures ): matched = False diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 101ad82b..68c93155 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -689,12 +689,12 @@ class ConformerSite: def __init__( self, residues: list[tuple[str, str]], - members: list[tuple[str, str, str]], - reference_ligand_id: tuple[str, str, str] + members: list[tuple[str, str, str, str]], + reference_ligand_id: tuple[str, str, str, str] ): self.residues: list[tuple[str, str]] = residues - self.members: list[tuple[str, str, str]] = members - self.reference_ligand_id: tuple[str, str, str] = reference_ligand_id + self.members: list[tuple[str, str, str, str]] = members + self.reference_ligand_id: tuple[str, str, str, str] = reference_ligand_id @staticmethod def from_dict(dic): From aa6c8bfc737d5cad629341fb05af680acb310672 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:07:44 +0000 Subject: [PATCH 40/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 9b87f429..c56f89b6 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1555,7 +1555,7 @@ def _load_connected_components(connected_components_yaml): for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue, version = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - connected_components[(dtag, chain, residue)] = [ + connected_components[(dtag, chain, residue, version)] = [ tuple([x for x in _ligand_id.split("+")]) for _ligand_id in neighbourhood_info From a764e1f7374b980f0f833be35bb2b277451aecc3 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 11:26:46 +0000 Subject: [PATCH 41/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index c56f89b6..f8c814c8 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1515,7 +1515,7 @@ def _load_ligand_neighbourhoods(ligand_neighbourhoods_yaml): for ligand_id, neighbourhood_info in dic.items(): dtag, chain, residue, version = ligand_id.split("/") neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) - ligand_neighbourhoods[(dtag, chain, residue)] = neighbourhood + ligand_neighbourhoods[(dtag, chain, residue, version)] = neighbourhood return ligand_neighbourhoods From a04b1e43fd737e00e7efccb43b4fa2bfb119b90a Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:01:00 +0000 Subject: [PATCH 42/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index f8c814c8..cd0af93c 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1260,7 +1260,7 @@ def _update( # Check for the matching conformer site conformer_site = None for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue) in conformer_sites[conformer_site_id].members: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: conformer_site = conformer_sites[conformer_site_id] break if conformer_site is None: @@ -1332,12 +1332,12 @@ def _update( # Check for the matching conformer site conformer_site = None for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue) in conformer_sites[conformer_site_id].members: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: conformer_site = conformer_sites[conformer_site_id] break if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") + print(f"Skipping alignment of {dtag} {chain} {residue}Ď€ to site {canonical_site_id}!") continue moving_ligand_id = (dtag, chain, residue) From f3f0af7c66886426ff666675d36b01d81b720b51 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:01:05 +0000 Subject: [PATCH 43/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index cd0af93c..a2a45e4f 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1337,7 +1337,7 @@ def _update( break if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue}Ď€ to site {canonical_site_id}!") + print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") continue moving_ligand_id = (dtag, chain, residue) From 5aab7272de85582f8b722f9973bcab8d1dd2b472 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:04:49 +0000 Subject: [PATCH 44/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index a2a45e4f..23507b67 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1332,7 +1332,7 @@ def _update( # Check for the matching conformer site conformer_site = None for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: + if (dtag, chain, residue) in conformer_sites[conformer_site_id].members: conformer_site = conformer_sites[conformer_site_id] break From ff538868c72d1409d1d729ba4c10e39602c31d0a Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:04:55 +0000 Subject: [PATCH 45/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 23507b67..a2a45e4f 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1332,7 +1332,7 @@ def _update( # Check for the matching conformer site conformer_site = None for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue) in conformer_sites[conformer_site_id].members: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: conformer_site = conformer_sites[conformer_site_id] break From 02d349705bdc402135d93164c64ab595723e2967 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:11:15 +0000 Subject: [PATCH 46/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index a2a45e4f..84d5381d 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1266,7 +1266,7 @@ def _update( if conformer_site is None: print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") continue - moving_ligand_id = (dtag, chain, residue) + moving_ligand_id = (dtag, chain, residue, version) reference_ligand_id = conformer_site.reference_ligand_id print(aligned_structure_path) _align_structure( @@ -1340,7 +1340,7 @@ def _update( print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") continue - moving_ligand_id = (dtag, chain, residue) + moving_ligand_id = (dtag, chain, residue, version) reference_ligand_id = conformer_site.reference_ligand_id # print(ligand_neighbourhoods) From 16c8654627fcf8bd77cd331c737e78de3972e514 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:12:09 +0000 Subject: [PATCH 47/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 84d5381d..c04bdaee 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1357,7 +1357,7 @@ def _update( xmap = read_xmap(xmap_path) __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue)], + ligand_neighbourhoods[(dtag, chain, residue, version)], alignability_graph, ligand_neighbourhood_transforms, reference_xmap, @@ -1377,7 +1377,7 @@ def _update( if mtz_path != "None": xmap = read_xmap_from_mtz(mtz_path, "2Fo-Fc") __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue)], + ligand_neighbourhoods[(dtag, chain, residue, version)], alignability_graph, ligand_neighbourhood_transforms, reference_xmap, From ef2ac1b31b29538281300fb7df3d63fb55323655 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:12:14 +0000 Subject: [PATCH 48/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index c04bdaee..97b225a0 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1393,7 +1393,7 @@ def _update( ) xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue)], + ligand_neighbourhoods[(dtag, chain, residue, version)], alignability_graph, ligand_neighbourhood_transforms, reference_xmap, From c49fd86197b735b2df6a9fa637a8063d95b29651 Mon Sep 17 00:00:00 2001 From: Tim Dudgeon Date: Mon, 5 Feb 2024 17:53:15 +0000 Subject: [PATCH 49/90] renamed xtalforms to crystalforms in assemblies.yaml --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 97b225a0..7a342a18 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1445,7 +1445,7 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): if xtalforms_file.exists(): with open(xtalforms_file, 'r') as f: - dic = yaml.safe_load(f)['xtalforms'] + dic = yaml.safe_load(f)['crystalforms'] for xtalform_id, xtalform_info in dic.items(): xtalforms[xtalform_id] = dt.XtalForm.from_dict(xtalform_info) @@ -1453,7 +1453,7 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): # Load new info and update if new_xtalforms_yaml.exists(): with open(new_xtalforms_yaml, 'r') as f: - new_xtalforms_dict = yaml.safe_load(f)['xtalforms'] + new_xtalforms_dict = yaml.safe_load(f)['crystalforms'] else: new_xtalforms_dict = {} From cc013eca05dc225b473000f44a736f4b1da5aca3 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 6 Feb 2024 14:48:29 +0000 Subject: [PATCH 50/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- src/ligand_neighbourhood_alignment/dt.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 97b225a0..7a342a18 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1445,7 +1445,7 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): if xtalforms_file.exists(): with open(xtalforms_file, 'r') as f: - dic = yaml.safe_load(f)['xtalforms'] + dic = yaml.safe_load(f)['crystalforms'] for xtalform_id, xtalform_info in dic.items(): xtalforms[xtalform_id] = dt.XtalForm.from_dict(xtalform_info) @@ -1453,7 +1453,7 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): # Load new info and update if new_xtalforms_yaml.exists(): with open(new_xtalforms_yaml, 'r') as f: - new_xtalforms_dict = yaml.safe_load(f)['xtalforms'] + new_xtalforms_dict = yaml.safe_load(f)['crystalforms'] else: new_xtalforms_dict = {} diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 68c93155..ea4ade7a 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -228,7 +228,7 @@ def from_dict(dic): return FSModel( source_dir=Path(dic["source_dir"]), fs_model=Path(dic['fs_model']), - xtalforms=Path(dic['xtalforms']), + xtalforms=Path(dic['crytalforms']), dataset_assignments=Path(dic['dataset_assignments']), ligand_neighbourhoods=Path(dic['ligand_neighbourhoods']), alignability_graph=Path(dic['alignability_graph']), @@ -269,7 +269,7 @@ def to_dict(self, ): return { 'source_dir': str(self.source_dir), 'fs_model': str(self.fs_model), - 'xtalforms': str(self.xtalforms), + 'crytalforms': str(self.xtalforms), 'dataset_assignments': str(self.dataset_assignments), 'ligand_neighbourhoods': str(self.ligand_neighbourhoods), 'alignability_graph': str(self.alignability_graph), From 0442a47f56826cdf7eced452a33a5205cf1a9be1 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:42:37 +0000 Subject: [PATCH 51/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/get_alignability.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index c21de5f7..be0c8545 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 5, # 10 splits A71, but some things almost identical end up in different clusters + min_alignable_atoms: int = 7, # 10 splits A71, but some things almost identical end up in different clusters max_alignable_rmsd: float = 2.0, ): @@ -137,7 +137,13 @@ def _match_cas( ) ) - if len(alignable_cas) >= min_alignable_atoms: + if len(alignable_cas) >= min( + [ + min_alignable_atoms, + len(ligand_1_neighbourhood.atoms), + len(ligand_2_neighbourhood.atoms) + ] + ): sup = gemmi.superpose_positions( [alignable_ca[0] for alignable_ca in alignable_cas], [alignable_ca[1] for alignable_ca in alignable_cas], From aeb9a38f277c6088563c6a8c1fdc4bb15b8b6fd5 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:50:51 +0000 Subject: [PATCH 52/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/dt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index ea4ade7a..e0a97669 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -228,7 +228,7 @@ def from_dict(dic): return FSModel( source_dir=Path(dic["source_dir"]), fs_model=Path(dic['fs_model']), - xtalforms=Path(dic['crytalforms']), + xtalforms=Path(dic['crystalforms']), dataset_assignments=Path(dic['dataset_assignments']), ligand_neighbourhoods=Path(dic['ligand_neighbourhoods']), alignability_graph=Path(dic['alignability_graph']), @@ -269,7 +269,7 @@ def to_dict(self, ): return { 'source_dir': str(self.source_dir), 'fs_model': str(self.fs_model), - 'crytalforms': str(self.xtalforms), + 'crystalforms': str(self.xtalforms), 'dataset_assignments': str(self.dataset_assignments), 'ligand_neighbourhoods': str(self.ligand_neighbourhoods), 'alignability_graph': str(self.alignability_graph), From e43b2efdcb90795d9151cc0051a1e75a57a4ad58 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 6 Feb 2024 16:56:32 +0000 Subject: [PATCH 53/90] Update using versioned alignment landmarks --- src/ligand_neighbourhood_alignment/get_alignability.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index be0c8545..f1d8b33c 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -113,7 +113,7 @@ def get_alignability( def _match_cas( ligand_1_neighbourhood: dt.Neighbourhood, ligand_2_neighbourhood: dt.Neighbourhood, - min_alignable_atoms: int = 7, # 10 splits A71, but some things almost identical end up in different clusters + min_alignable_atoms: int = 9, # 10 splits A71, but some things almost identical end up in different clusters max_alignable_rmsd: float = 2.0, ): From 5c29fa8f2c703a6e31b8aecbce7da77344018f36 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 8 Feb 2024 14:57:17 +0000 Subject: [PATCH 54/90] Address m2m issue 1280 by ensuring conformer sites in only one canonical site --- src/ligand_neighbourhood_alignment/cli.py | 55 ++++++++++++++--------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 7a342a18..5a26c455 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -13,7 +13,8 @@ import pandas as pd import yaml from loguru import logger -logger.remove() # for someone not familiar with the lib, whats going on here? + +logger.remove() # for someone not familiar with the lib, whats going on here? logger.add(sys.stdout, level="INFO") from rich import print @@ -516,8 +517,8 @@ def _generate_assembly( xtalform_assembly.transforms, ): - # for generator in assembly.generators: - # op = gemmi.Op(generator.triplet) + # for generator in assembly.generators: + # op = gemmi.Op(generator.triplet) op = gemmi.Op(_transform) # chain_clone = structure[0][generator.chain].clone() chain_clone = structure[0][_chain].clone() @@ -550,7 +551,7 @@ def _get_structure_fragments(dataset: dt.Dataset, structure, version): for model in structure: for chain in model: source_chain, biomol_chain, transform = chain.name.split("~") - for residue in chain: #.get_ligands(): + for residue in chain: # .get_ligands(): for lbe in dataset.ligand_binding_events: # if ( # (residue.name == "LIG") @@ -561,7 +562,8 @@ def _get_structure_fragments(dataset: dt.Dataset, structure, version): # ligand_id = (dataset.dtag, str(chain.name), str(lbe.residue),) # fragments[ligand_id] = residue # lig_number = lig_number + 1 - if (str(lbe[2]) == str(residue.seqid.num)) & (str(lbe[1]) == str(source_chain)) & (transform == "x,y,z"): + if (str(lbe[2]) == str(residue.seqid.num)) & (str(lbe[1]) == str(source_chain)) & ( + transform == "x,y,z"): ligand_id = (dataset.dtag, str(lbe[1]), str(lbe[2]), str(version)) fragments[ligand_id] = residue @@ -638,6 +640,7 @@ def _save_neighbourhoods( dic["/".join(ligand_id)] = neighbourhood.to_dict() yaml.safe_dump(dic, f) + def _save_ligand_neighbourhood_transforms(fs_model, ligand_neighbourhood_transforms): with open(fs_model.ligand_neighbourhood_transforms, 'w') as f: dic = {} @@ -657,7 +660,6 @@ def _update_graph( ligand_neighbourhoods, ligand_neighbourhood_transforms, ): - nodes = alignability_graph.nodes edges = alignability_graph.edges @@ -721,8 +723,6 @@ def _get_connected_components( target ) - - # degrees = dict(nx.degree(H)) @@ -758,6 +758,7 @@ def _get_connected_components( return clusters + def _update_conformer_sites( conformer_sites: dict[str, dt.ConformerSite], connected_component_id: tuple[str, str, str, str], @@ -792,6 +793,7 @@ def _update_conformer_sites( conformer_site_id = "+".join(conformer_site.reference_ligand_id) conformer_sites[conformer_site_id] = conformer_site + def _save_connected_components(fs_model, connected_components): with open(fs_model.connected_components, 'w') as f: dic = {} @@ -803,6 +805,7 @@ def _save_connected_components(fs_model, connected_components): ] yaml.safe_dump(dic, f, sort_keys=False) + def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): with open(fs_model.conformer_sites, 'w') as f: dic = {} @@ -822,16 +825,23 @@ def _update_canonical_sites( else: global_reference_dtag = conformer_site.reference_ligand_id[0] - # Check each canonical site to see if conformer site already in it and if not + # If conformer site already in a canonical site skip + for canonical_site_id, canonical_site in canonical_sites.items(): + if conformer_site_id in canonical_site.conformer_site_ids: + return + + # Check each canonical site to see if conformer site already in it and if not # whether it shares enough residues to now be added matched = False - conformer_site_residues =[(residue[1], residue[2]) for residue in conformer_site.residues] + conformer_site_residues = [(residue[1], residue[2]) for residue in conformer_site.residues] for canonical_site_id, canonical_site in canonical_sites.items(): + if matched: + continue canonical_site_residues = [(residue[1], residue[2]) for residue in canonical_site.residues] if conformer_site_id not in canonical_site.conformer_site_ids: v = set(canonical_site_residues).intersection(set(conformer_site_residues)) # if len(v) >= min(min_shared_residues, int((3/4)*len(canonical_site_residues))): - if len(v) >= 0.75*len(canonical_site_residues): + if len(v) >= 0.75 * len(canonical_site_residues): # Matched! matched = True canonical_site.conformer_site_ids.append(conformer_site_id) @@ -951,7 +961,6 @@ def _update_fs_model( conformer_sites: dict[str, dt.ConformerSite], reference_datasets: dict[str, dt.Dataset] ): - # Iterate over canonical sites and their members, checking if they already have an output record and # if not creating one alignments = fs_model.alignments @@ -980,13 +989,13 @@ def _update_fs_model( ) ligand_neighbourhood_output.aligned_artefacts[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR /dtag / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( + fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( dtag=dtag, chain=chain, residue=residue, site=canonical_site_id ) ) ligand_neighbourhood_output.aligned_xmaps[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag /constants.ALIGNED_XMAP_TEMPLATE.format( + fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_XMAP_TEMPLATE.format( dtag=dtag, chain=chain, residue=residue, site=canonical_site_id) @@ -999,9 +1008,8 @@ def _update_fs_model( site=canonical_site_id) ) - ligand_neighbourhood_output.aligned_event_maps[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR /dtag / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( + fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( dtag=dtag, chain=chain, residue=residue, site=canonical_site_id ) ) @@ -1018,15 +1026,16 @@ def _update_fs_model( if canonical_site_id not in reference_alignments[dtag]: reference_alignments[dtag][canonical_site_id] = { - 'aligned_structures': fs_model.source_dir / constants.ALIGNED_FILES_DIR /dtag / constants.ALIGNED_REFERENCE_STRUCTURE_TEMPLATE.format( + 'aligned_structures': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_STRUCTURE_TEMPLATE.format( dtag=dtag, site=canonical_site_id ), - 'aligned_artefacts': fs_model.source_dir / constants.ALIGNED_FILES_DIR /dtag / constants.ALIGNED_REFERENCE_STRUCTURE_ARTEFACTS_TEMPLATE.format( + 'aligned_artefacts': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_STRUCTURE_ARTEFACTS_TEMPLATE.format( dtag=dtag, site=canonical_site_id ), - 'aligned_xmaps': fs_model.source_dir / constants.ALIGNED_FILES_DIR /dtag / constants.ALIGNED_REFERENCE_XMAP_TEMPLATE.format(dtag=dtag, - site=canonical_site_id), + 'aligned_xmaps': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_XMAP_TEMPLATE.format( + dtag=dtag, + site=canonical_site_id), # 'aligned_event_maps': fs_model.source_dir / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( # dtag=dtag, chain=chain, residue=residue, site=canonical_site_id # ), @@ -1063,7 +1072,8 @@ def _update( # alignment_landmarks: dict[tuple[str,str,str,int], dict[tuple[str, str, str], dt.Atom]], alignability_graph, connected_components, - ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], + ligand_neighbourhood_transforms: dict[ + tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], conformer_sites: dict[str, dt.ConformerSite], conformer_site_transforms: dict[tuple[str, str], dt.Transform], canonical_sites: dict[str, dt.CanonicalSite], @@ -1167,7 +1177,7 @@ def _update( for conformer_site_id, conformer_site in conformer_sites.items(): # If conformer site in a canonical site, replace with new data, otherwise # Check if residues match as usual, otherwise create a new canon site for it - _update_canonical_sites(canonical_sites, conformer_site, conformer_site_id,) + _update_canonical_sites(canonical_sites, conformer_site, conformer_site_id, ) logger.info(f"Now have {len(canonical_sites)} canonical sites") logger.info(f"Global reference dtag is: {list(canonical_sites.values())[0].global_reference_dtag}") _save_canonical_sites(fs_model, canonical_sites) @@ -1464,6 +1474,7 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): return xtalforms + def _load_xtalforms_and_assemblies(xtalforms_file, new_xtalforms_yaml): assemblies = {} xtalforms = {} From ef8b5ca4c2d086488077fc226d68b207696fc2d8 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 16 Feb 2024 15:05:38 +0000 Subject: [PATCH 55/90] Address m2m issue 1280 by ensuring conformer sites in only one canonical site --- .../generate_aligned_structures.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 76f1b0e9..3f279257 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -267,6 +267,7 @@ def _align_structure( _structure, moving_ligand_id: tuple[str, str, str], reference_ligand_id: tuple[str, str, str], + neighbourhood: dt.Neighbourhood, g, neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], conformer_site_transforms: dict[tuple[str, str], dt.Transform], @@ -304,6 +305,13 @@ def _align_structure( _structure = superpose_structure(running_transform, _structure) + # Drop chains without atoms + neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + for _model in _structure: + for _chain in _model: + if _chain.name not in neighbourhood_chains: + _structure.remove_chain(_chain.name) + # Write the fully aligned structure _structure.write_pdb(str(out_path)) From 047110c6723e4307dfd7ec4f59c42301b611572f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 16 Feb 2024 15:05:56 +0000 Subject: [PATCH 56/90] Removing non-binding chains from aligned output --- .../generate_aligned_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 3f279257..7687bd36 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -305,7 +305,7 @@ def _align_structure( _structure = superpose_structure(running_transform, _structure) - # Drop chains without atoms + # Drop chains without atoms in neighbourhood neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) for _model in _structure: for _chain in _model: From 883c6672c25ef50e3f853902295426683ddb475b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Fri, 16 Feb 2024 15:08:28 +0000 Subject: [PATCH 57/90] Removing non-binding chains from aligned output --- .../generate_sites_from_components.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py index fbb8dc20..7d8aa404 100644 --- a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py +++ b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py @@ -27,7 +27,8 @@ ) # from ligand_neighbourhood_alignment.save_sites import save_sites -from ligand_neighbourhood_alignment.structures import get_structures, get_transform_from_residues, _get_transform_from_residues +from ligand_neighbourhood_alignment.structures import get_structures, get_transform_from_residues, \ + _get_transform_from_residues def get_components(g): @@ -150,9 +151,9 @@ def get_sites_from_conformer_sites(conformer_sites: ConformerSites, neighbourhoo def get_xtalform_sites_from_canonical_sites( - canonical_sites: CanonicalSites, - assigned_xtalforms: AssignedXtalForms, - xtalforms: XtalForms, + canonical_sites: CanonicalSites, + assigned_xtalforms: AssignedXtalForms, + xtalforms: XtalForms, ): """ Each canonical site may occur in several forms, depending on the @@ -212,7 +213,6 @@ def get_xtalform_sites_from_canonical_sites( def get_subsite_transforms(sites: CanonicalSites, structures): - transforms = {} for site_id, site in zip(sites.site_ids, sites.sites): rss = site.reference_ligand_id.dtag @@ -227,21 +227,22 @@ def get_subsite_transforms(sites: CanonicalSites, structures): return transforms + from ligand_neighbourhood_alignment import dt + + def _update_conformer_site_transforms( - conformer_site_transforms, - canonical_site: dt.CanonicalSite, - conformer_sites: dict[str, dt.ConformerSite], + conformer_site_transforms, + canonical_site: dt.CanonicalSite, + conformer_sites: dict[str, dt.ConformerSite], structures, - ): - +): ref_conformer_site = conformer_sites[canonical_site.reference_conformer_site_id] ref_conformer_site_residues = ref_conformer_site.residues for conformer_site_id in canonical_site.conformer_site_ids: key = (canonical_site.reference_conformer_site_id, conformer_site_id) if key not in conformer_site_transforms: - conformer_site = conformer_sites[conformer_site_id] # conformer_site_residues = conformer_site.residues @@ -253,8 +254,6 @@ def _update_conformer_site_transforms( conformer_site_transforms[key] = dt.Transform(transform.vec.tolist(), transform.mat.tolist()) - - # transforms = {} # for site_id, site in zip(sites.site_ids, sites.sites): # rss = site.reference_ligand_id.dtag @@ -289,14 +288,15 @@ def get_site_transforms(sites: CanonicalSites, structures): return transforms + def _update_canonical_site_transforms( - canonical_site_transforms: dict[str, dt.Transform], + canonical_site_transforms: dict[str, dt.Transform], canonical_site_id, - canonical_site: dt.CanonicalSite, - # canonical_sites: dict[str, dt.CanonicalSite], + canonical_site: dt.CanonicalSite, + # canonical_sites: dict[str, dt.CanonicalSite], conformer_sites: dict[str, dt.ConformerSite], structures, - ): +): rss = structures[canonical_site.global_reference_dtag] ref_site_all_ress = [ (chain.name, res.seqid.num) for model in rss for chain in model for res in chain @@ -311,6 +311,7 @@ def _update_canonical_site_transforms( transform.mat.tolist(), ) + def _update_reference_structure_transforms( reference_structure_transforms, key, @@ -327,8 +328,8 @@ def _update_reference_structure_transforms( transform.mat.tolist(), ) -def _generate_sites_from_components(_source_dir: Path): +def _generate_sites_from_components(_source_dir: Path): logger.info(f"Source dir: {_source_dir}") g = read_graph(_source_dir) neighbourhoods: LigandNeighbourhoods = read_neighbourhoods(_source_dir) @@ -395,4 +396,3 @@ def _generate_sites_from_components(_source_dir: Path): save_site_transforms(site_transforms, _source_dir) return canonical_sites - From 4e208ec01b2e6abaaef3c8a54a7b4efb48a5ca04 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:06:46 +0000 Subject: [PATCH 58/90] Sensible error for issue 1337 --- src/ligand_neighbourhood_alignment/cli.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 5a26c455..ac9cd860 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -521,7 +521,17 @@ def _generate_assembly( # op = gemmi.Op(generator.triplet) op = gemmi.Op(_transform) # chain_clone = structure[0][generator.chain].clone() - chain_clone = structure[0][_chain].clone() + try: + chain_clone = structure[0][_chain].clone() + except Exception as e: + raise Exception( + 'An Exception occurred in generating the biological assemblies\n' + f'Based on the assembly, the expected chains were: {xtalform_assembly.chains}\n' + f'However the chains in the structure were: {[_x.name for _x in structure[0]]}\n' + 'XCA does not currently handle datasets with a mis-match between the xtalform chains.\n' + 'You should ensure that the chain names are consistent with the reference dataset for the xtalforms.' + ) + for residue in chain_clone: for atom in residue: From f720240e92cec715a7e9597b22a801396e912b81 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:22:49 +0000 Subject: [PATCH 59/90] Sensible error for issue 1337 --- src/ligand_neighbourhood_alignment/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index ac9cd860..490550cb 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -532,7 +532,6 @@ def _generate_assembly( 'You should ensure that the chain names are consistent with the reference dataset for the xtalforms.' ) - for residue in chain_clone: for atom in residue: atom_frac = structure.cell.fractionalize(atom.pos) From ff41f5d3fc053115248a265bb7bb3926c1d5f16f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:33:46 +0000 Subject: [PATCH 60/90] Sensible error for issue 1337 --- src/ligand_neighbourhood_alignment/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 490550cb..128b3bd0 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1292,6 +1292,7 @@ def _update( _structure, moving_ligand_id, reference_ligand_id, + ligand_neighbourhoods[moving_ligand_id], alignability_graph, ligand_neighbourhood_transforms, conformer_site_transforms, From 9f3757ad0547be581312e5493330540253d04d79 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 21 Feb 2024 10:35:54 +0000 Subject: [PATCH 61/90] Sensible error for issue 1337 --- .../generate_aligned_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 7687bd36..7f03f412 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -310,7 +310,7 @@ def _align_structure( for _model in _structure: for _chain in _model: if _chain.name not in neighbourhood_chains: - _structure.remove_chain(_chain.name) + _model.remove_chain(_chain.name) # Write the fully aligned structure _structure.write_pdb(str(out_path)) From fbefc43dd1512f8fd8ee0ea83b65c1b3f9a51624 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:11:32 +0000 Subject: [PATCH 62/90] Sensible error for issue 1337 --- src/ligand_neighbourhood_alignment/cli.py | 1 + .../generate_aligned_structures.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 128b3bd0..9c501e9c 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1299,6 +1299,7 @@ def _update( # canonical_site_transforms, canonical_site_id, conformer_site_id, + xtalforms[dataset_assignments[dtag]], aligned_structure_path, ) else: diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 7f03f412..3514735d 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -274,6 +274,7 @@ def _align_structure( # canonical_site_transforms: dict[str, dt.Transform], canonical_site_id: str, conformer_site_id: str, + xtalform: dt.XtalForm, out_path: Path, ): shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) @@ -306,10 +307,18 @@ def _align_structure( _structure = superpose_structure(running_transform, _structure) # Drop chains without atoms in neighbourhood - neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + # neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + chain_assemblies = { + _chain: _assembly + for _assembly_name, _assembly + in xtalform.assemblies.items() + for _chain in _assembly.chains + } + lig_assembly = chain_assemblies[moving_ligand_id[1]] for _model in _structure: for _chain in _model: - if _chain.name not in neighbourhood_chains: + + if _chain.name not in lig_assembly.chains: # Remove any chain the ligand isn't modelled onto _model.remove_chain(_chain.name) # Write the fully aligned structure From 6f9e582d9079b38d9b71b192e6cffa49a4c9fa15 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:12:24 +0000 Subject: [PATCH 63/90] Output only assembly chains in aligned structures for issue 1360 --- .../generate_aligned_structures.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 3514735d..fd37fd22 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -314,6 +314,7 @@ def _align_structure( in xtalform.assemblies.items() for _chain in _assembly.chains } + lig_assembly = chain_assemblies[moving_ligand_id[1]] for _model in _structure: for _chain in _model: From cf233fd49a2c1e3f4737bc4730d97d51f00486ff Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:13:57 +0000 Subject: [PATCH 64/90] Output only assembly/proximity chains in aligned structures for issue 1360 --- .../generate_aligned_structures.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index fd37fd22..359af45b 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -307,7 +307,7 @@ def _align_structure( _structure = superpose_structure(running_transform, _structure) # Drop chains without atoms in neighbourhood - # neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) chain_assemblies = { _chain: _assembly for _assembly_name, _assembly @@ -319,7 +319,8 @@ def _align_structure( for _model in _structure: for _chain in _model: - if _chain.name not in lig_assembly.chains: # Remove any chain the ligand isn't modelled onto + if (_chain.name not in lig_assembly.chains) & (_chain.name not in neighbourhood_chains): # Remove any chain the ligand isn't modelled onto + _model.remove_chain(_chain.name) # Write the fully aligned structure From d42672359f7b80c36f95245c36ab52ca4e6a093b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:55:06 +0000 Subject: [PATCH 65/90] Output only assembly/proximity chains in aligned structures for issue 1360 --- src/ligand_neighbourhood_alignment/cli.py | 45 ++++++++++++++++++++++- src/ligand_neighbourhood_alignment/dt.py | 10 +++-- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 9c501e9c..eb48db02 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -823,10 +823,41 @@ def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.Co yaml.safe_dump(dic, f) +def _get_centroid_res( + residues: list[tuple[str, str]], + reference_neighbourhood: dt.Neighbourhood, +): + res_cas = {} + for _residue_id in residues: + for _atom_id, _atom in reference_neighbourhood.atoms.items() + if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == 'CA') + res_cas[_atom_id] = _atom + id_arr = [_atom_id for _atom_id in res_cas] + arr = np.array( + [ + [_atom.x, _atom.y, _atom.z] + for _atom + in res_cas.values() + ] + ) + centroid = np.mean(arr, axis=0) + closest = np.argmin( + np.linalg.norm( + arr-centroid, + axis=1 + ) + ) + closest_atom_id = id_arr[closest] + + return (closest_atom_id[0], closest_atom_id[1]) + + + def _update_canonical_sites( canonical_sites: dict[str, dt.CanonicalSite], conformer_site: dt.ConformerSite, conformer_site_id, + neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], min_shared_residues=6 ): if len(canonical_sites) != 0: @@ -857,11 +888,21 @@ def _update_canonical_sites( # If not matched to any existing canonical site create a new one if not matched: + centroid_res = _get_centroid_res( + conformer_site.residues, + neighbourhoods[conformer_site.reference_ligand_id] + ) canonical_site = dt.CanonicalSite( [conformer_site_id, ], conformer_site.residues, conformer_site_id, - global_reference_dtag + global_reference_dtag, + ( + conformer_site.reference_ligand_id[0], + centroid_res[0], + centroid_res[1], + conformer_site.reference_ligand_id[1] + ) ) canonical_site_id = conformer_site_id canonical_sites[canonical_site_id] = canonical_site @@ -1186,7 +1227,7 @@ def _update( for conformer_site_id, conformer_site in conformer_sites.items(): # If conformer site in a canonical site, replace with new data, otherwise # Check if residues match as usual, otherwise create a new canon site for it - _update_canonical_sites(canonical_sites, conformer_site, conformer_site_id, ) + _update_canonical_sites(canonical_sites, conformer_site, conformer_site_id, ligand_neighbourhoods) logger.info(f"Now have {len(canonical_sites)} canonical sites") logger.info(f"Global reference dtag is: {list(canonical_sites.values())[0].global_reference_dtag}") _save_canonical_sites(fs_model, canonical_sites) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index e0a97669..85d21d1d 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -727,12 +727,14 @@ def __init__( conformer_site_ids: list[str], residues: list[tuple[str, str]], reference_conformer_site_id: str, - global_reference_dtag: str + global_reference_dtag: str, + centroid_res: tuple[str,str,str] ): self.conformer_site_ids: list[str] = conformer_site_ids self.residues: list[tuple[str, str]] = residues self.reference_conformer_site_id: str = reference_conformer_site_id self.global_reference_dtag: str = global_reference_dtag + self.centroid_res = centroid_res @staticmethod def from_dict(dic): @@ -745,7 +747,8 @@ def from_dict(dic): dic['conformer_site_ids'], residues, dic['reference_conformer_site_id'], - dic['global_reference_dtag'] + dic['global_reference_dtag'], + dic['centroid_res'].split('/') ) @@ -754,7 +757,8 @@ def to_dict(self): 'conformer_site_ids': self.conformer_site_ids, 'residues': ["/".join(res) for res in self.residues], 'reference_conformer_site_id': self.reference_conformer_site_id, - 'global_reference_dtag': self.global_reference_dtag + 'global_reference_dtag': self.global_reference_dtag, + 'centroid_res': self.centroid_res } From d5a5148b5aec5b21fd9d561756c8755c5cbf233b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:55:50 +0000 Subject: [PATCH 66/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index eb48db02..0bf7a13a 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -904,6 +904,8 @@ def _update_canonical_sites( conformer_site.reference_ligand_id[1] ) ) + + canonical_site_id = conformer_site_id canonical_sites[canonical_site_id] = canonical_site From 312ffcc17092e1cb8e27661ea4eb727aa23ae5df Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:57:48 +0000 Subject: [PATCH 67/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/dt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 85d21d1d..9dcd3c89 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -728,7 +728,7 @@ def __init__( residues: list[tuple[str, str]], reference_conformer_site_id: str, global_reference_dtag: str, - centroid_res: tuple[str,str,str] + centroid_res: tuple[str,str,str, str] ): self.conformer_site_ids: list[str] = conformer_site_ids self.residues: list[tuple[str, str]] = residues From 386a9c8779120cea289e365d886617969ad369a4 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 16:59:06 +0000 Subject: [PATCH 68/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 0bf7a13a..24c5eb5b 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -830,7 +830,7 @@ def _get_centroid_res( res_cas = {} for _residue_id in residues: for _atom_id, _atom in reference_neighbourhood.atoms.items() - if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == 'CA') + if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == 'CA'): res_cas[_atom_id] = _atom id_arr = [_atom_id for _atom_id in res_cas] arr = np.array( From 2bd839d03274c1dacd265de4e90715df5f93ee04 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 17:00:24 +0000 Subject: [PATCH 69/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 24c5eb5b..17c1c177 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -829,7 +829,7 @@ def _get_centroid_res( ): res_cas = {} for _residue_id in residues: - for _atom_id, _atom in reference_neighbourhood.atoms.items() + for _atom_id, _atom in reference_neighbourhood.atoms.items(): if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == 'CA'): res_cas[_atom_id] = _atom id_arr = [_atom_id for _atom_id in res_cas] From c8a853dd68a5855d13acf7cf34c2e1b478e1bf26 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 29 Feb 2024 17:04:36 +0000 Subject: [PATCH 70/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/dt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 9dcd3c89..14a47eae 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -758,7 +758,7 @@ def to_dict(self): 'residues': ["/".join(res) for res in self.residues], 'reference_conformer_site_id': self.reference_conformer_site_id, 'global_reference_dtag': self.global_reference_dtag, - 'centroid_res': self.centroid_res + 'centroid_res': '/'.join(self.centroid_res) } From 9614cbb2c1294b8a25ff8bd212e19c66dde41f17 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:23:11 +0000 Subject: [PATCH 71/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 17c1c177..e2a66165 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -497,6 +497,7 @@ def _generate_assembly( xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], + pdb ): full_st = structure.clone() chains_to_delete = [] @@ -525,7 +526,7 @@ def _generate_assembly( chain_clone = structure[0][_chain].clone() except Exception as e: raise Exception( - 'An Exception occurred in generating the biological assemblies\n' + f'An Exception occurred in generating the biological assemblies for {pdb}\n' f'Based on the assembly, the expected chains were: {xtalform_assembly.chains}\n' f'However the chains in the structure were: {[_x.name for _x in structure[0]]}\n' 'XCA does not currently handle datasets with a mis-match between the xtalform chains.\n' @@ -595,7 +596,7 @@ def _get_dataset_neighbourhoods( logger.debug(f"{structure.cell}") # Get the rest of the assembly - assembly = _generate_assembly(xtalform, structure, assemblies) + assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb) # Get the bound fragments fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments( From 735c4722f1256d105b6ce3fcf1e6546a429aa2c7 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:23:30 +0000 Subject: [PATCH 72/90] New canonical site id to address issue 1326 --- src/ligand_neighbourhood_alignment/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index e2a66165..4abfa0ed 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -526,7 +526,8 @@ def _generate_assembly( chain_clone = structure[0][_chain].clone() except Exception as e: raise Exception( - f'An Exception occurred in generating the biological assemblies for {pdb}\n' + f'An Exception occurred in generating the biological assemblies for\n' + f'{pdb}\n' f'Based on the assembly, the expected chains were: {xtalform_assembly.chains}\n' f'However the chains in the structure were: {[_x.name for _x in structure[0]]}\n' 'XCA does not currently handle datasets with a mis-match between the xtalform chains.\n' From 71111901a23ac61006223c55b990d4f289c9e151 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:56:47 +0100 Subject: [PATCH 73/90] New approach to treat version differently in fs model --- .../alignment_heirarchy.py | 10 + src/ligand_neighbourhood_alignment/cli.py | 262 +++++++++--------- .../constants.py | 10 +- .../generate_sites_from_components.py | 4 +- 4 files changed, 150 insertions(+), 136 deletions(-) create mode 100644 src/ligand_neighbourhood_alignment/alignment_heirarchy.py diff --git a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py new file mode 100644 index 00000000..905fef6d --- /dev/null +++ b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py @@ -0,0 +1,10 @@ +from ligand_neighbourhood_alignment import dt + +def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]): + # The Alignment hierarchy is the graph of alignments one must perform in order to get from + # a ligand canonical site to the Reference Assembly Frame + + # + + + ... \ No newline at end of file diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 4abfa0ed..bdb8d4fb 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -890,7 +890,7 @@ def _update_canonical_sites( # If not matched to any existing canonical site create a new one if not matched: - centroid_res = _get_centroid_res( + centroid_res = _get_centroid_res( conformer_site.residues, neighbourhoods[conformer_site.reference_ligand_id] ) @@ -1028,9 +1028,11 @@ def _update_fs_model( if chain not in alignments[dtag]: alignments[dtag][chain] = {} if residue not in alignments[dtag][chain]: - alignments[dtag][chain][residue] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) + alignments[dtag][chain][version] = {} + if version not in alignments[dtag][chain][residue][version]: + alignments[dtag][chain][residue][version] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) - ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue] + ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue][version] if not (fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag).exists(): os.mkdir(fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag) @@ -1038,33 +1040,33 @@ def _update_fs_model( if canonical_site_id not in ligand_neighbourhood_output.aligned_structures: ligand_neighbourhood_output.aligned_structures[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id ) ) ligand_neighbourhood_output.aligned_artefacts[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version,site=canonical_site_id ) ) ligand_neighbourhood_output.aligned_xmaps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_XMAP_TEMPLATE.format( dtag=dtag, chain=chain, - residue=residue, + residue=residue,version=version, site=canonical_site_id) ) ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_DIFF_TEMPLATE.format( dtag=dtag, chain=chain, - residue=residue, + residue=residue, version=version, site=canonical_site_id) ) ligand_neighbourhood_output.aligned_event_maps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id ) ) @@ -1315,40 +1317,41 @@ def _update( # for lid in conformer_site.ligand_ids: for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): - for residue, ligand_neighbourhood_output in chain_alignment_info.items(): - for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): - if not Path(aligned_structure_path).exists(): - # _update_aligned_structures() - _structure = structures[dtag].clone() - canonical_site = canonical_sites[canonical_site_id] - # Check for the matching conformer site - conformer_site = None - for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: - conformer_site = conformer_sites[conformer_site_id] - break - if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") - continue - moving_ligand_id = (dtag, chain, residue, version) - reference_ligand_id = conformer_site.reference_ligand_id - print(aligned_structure_path) - _align_structure( - _structure, - moving_ligand_id, - reference_ligand_id, - ligand_neighbourhoods[moving_ligand_id], - alignability_graph, - ligand_neighbourhood_transforms, - conformer_site_transforms, - # canonical_site_transforms, - canonical_site_id, - conformer_site_id, - xtalforms[dataset_assignments[dtag]], - aligned_structure_path, - ) - else: - logger.info(f"Already output structure!") + for residue, residue_alignment_info in chain_alignment_info.items(): + for version, ligand_neighbourhood_output in residue_alignment_info.items(): + for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): + if not Path(aligned_structure_path).exists(): + # _update_aligned_structures() + _structure = structures[dtag].clone() + canonical_site = canonical_sites[canonical_site_id] + # Check for the matching conformer site + conformer_site = None + for conformer_site_id in canonical_site.conformer_site_ids: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: + conformer_site = conformer_sites[conformer_site_id] + break + if conformer_site is None: + print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") + continue + moving_ligand_id = (dtag, chain, residue, version) + reference_ligand_id = conformer_site.reference_ligand_id + print(aligned_structure_path) + _align_structure( + _structure, + moving_ligand_id, + reference_ligand_id, + ligand_neighbourhoods[moving_ligand_id], + alignability_graph, + ligand_neighbourhood_transforms, + conformer_site_transforms, + # canonical_site_transforms, + canonical_site_id, + conformer_site_id, + xtalforms[dataset_assignments[dtag]], + aligned_structure_path, + ) + else: + logger.info(f"Already output structure!") # Generate alignments of references to each canonical site # for canonical_site_id, canonical_site in canonical_sites.items(): @@ -1389,93 +1392,94 @@ def _update( logger.info(f"Outputting xmaps...") for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): - for residue, ligand_neighbourhood_output in chain_alignment_info.items(): - for canonical_site_id, aligned_event_map_path in ligand_neighbourhood_output.aligned_event_maps.items(): - logger.info(f"Writing to: {aligned_event_map_path}") - if not Path(aligned_event_map_path).exists(): - _structure = structures[dtag].clone() - canonical_site = canonical_sites[canonical_site_id] - # Check for the matching conformer site - conformer_site = None - for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: - conformer_site = conformer_sites[conformer_site_id] - break - - if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") - continue - - moving_ligand_id = (dtag, chain, residue, version) - reference_ligand_id = conformer_site.reference_ligand_id - # print(ligand_neighbourhoods) - - xmap_path = datasets[dtag].ligand_binding_events[(dtag, chain, residue)].xmap - - aligned_structure_path = ligand_neighbourhood_output.aligned_structures[canonical_site_id] - aligned_structure = gemmi.read_structure(str(aligned_structure_path)) - aligned_res = aligned_structure[0][chain][str(residue)][0] - - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].dtag) - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].chain) - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].residue) # * - if (xmap_path != "None") and (xmap_path is not None): - xmap = read_xmap(xmap_path) - - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - aligned_event_map_path, - aligned_res - ) - mtz_path = datasets[dtag].mtz - # print(f"Mtz path: {mtz_path}") - # raise Exception - if mtz_path != "None": - xmap = read_xmap_from_mtz(mtz_path, "2Fo-Fc") - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - ligand_neighbourhood_output.aligned_xmaps[canonical_site_id], - aligned_res - ) - xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id], - aligned_res - ) - - else: - logger.info(f"Already output xmap!") + for residue, residue_alignment_info in chain_alignment_info.items(): + for version, ligand_neighbourhood_output in residue_alignment_info.items(): + for canonical_site_id, aligned_event_map_path in ligand_neighbourhood_output.aligned_event_maps.items(): + logger.info(f"Writing to: {aligned_event_map_path}") + if not Path(aligned_event_map_path).exists(): + _structure = structures[dtag].clone() + canonical_site = canonical_sites[canonical_site_id] + # Check for the matching conformer site + conformer_site = None + for conformer_site_id in canonical_site.conformer_site_ids: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: + conformer_site = conformer_sites[conformer_site_id] + break + + if conformer_site is None: + print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") + continue + + moving_ligand_id = (dtag, chain, residue, version) + reference_ligand_id = conformer_site.reference_ligand_id + # print(ligand_neighbourhoods) + + xmap_path = datasets[dtag].ligand_binding_events[(dtag, chain, residue)].xmap + + aligned_structure_path = ligand_neighbourhood_output.aligned_structures[canonical_site_id] + aligned_structure = gemmi.read_structure(str(aligned_structure_path)) + aligned_res = aligned_structure[0][chain][str(residue)][0] + + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].dtag) + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].chain) + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].residue) # * + if (xmap_path != "None") and (xmap_path is not None): + xmap = read_xmap(xmap_path) + + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + aligned_event_map_path, + aligned_res + ) + mtz_path = datasets[dtag].mtz + # print(f"Mtz path: {mtz_path}") + # raise Exception + if mtz_path != "None": + xmap = read_xmap_from_mtz(mtz_path, "2Fo-Fc") + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + ligand_neighbourhood_output.aligned_xmaps[canonical_site_id], + aligned_res + ) + xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id], + aligned_res + ) + + else: + logger.info(f"Already output xmap!") return fs_model diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 5a0aa4ba..97ab9da2 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -33,11 +33,11 @@ MODEL_DIR_MTZ: str = "refine.mtz" OUTPUT_JSON_PATH: str = "output.json" -ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}.pdb" -ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_artefacts.pdb" -ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_2FoFc.ccp4" -ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_FoFc.ccp4" -ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_event.ccp4" +ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}.pdb" +ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_artefacts.pdb" +ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_2FoFc.ccp4" +ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_FoFc.ccp4" +ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_{}_event.ccp4" FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" diff --git a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py index 7d8aa404..6337c76b 100644 --- a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py +++ b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py @@ -247,10 +247,10 @@ def _update_conformer_site_transforms( # conformer_site_residues = conformer_site.residues transform = _get_transform_from_residues( - [(x[0], x[1]) for x in canonical_site.residues], structures[conformer_site.reference_ligand_id[0]], - structures[ref_conformer_site.reference_ligand_id[0]]) + structures[ref_conformer_site.reference_ligand_id[0]], + ) conformer_site_transforms[key] = dt.Transform(transform.vec.tolist(), transform.mat.tolist()) From 20756f6f2e1c0a4f7f61d840863fb2f538fc29a6 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:59:33 +0100 Subject: [PATCH 74/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/dt.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 14a47eae..8ddefd2c 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -201,10 +201,13 @@ def from_dict(dic): alignments[dtag] = {} for chain, chain_alignments in dataset_alignments.items(): alignments[dtag][chain] = {} - for residue, ligand_neighbourhood_alignments in chain_alignments.items(): - # _dtag, _chain, _residue = ligand_neighbourhood.split("/") - alignments[dtag][chain][residue] = LigandNeighbourhoodOutput.from_dict( - ligand_neighbourhood_alignments, source_dir) + for residue, residue_alignments in chain_alignments.items(): + alignments[dtag][chain][residue] = {} + + for version, ligand_neighbourhood_alignments in residue_alignments.items(): + # _dtag, _chain, _residue = ligand_neighbourhood.split("/") + alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.from_dict( + ligand_neighbourhood_alignments, source_dir) # reference_alignments = {} # for dtag, dataset_alignments in alignments["reference_alignments"].items(): @@ -251,9 +254,10 @@ def to_dict(self, ): alignments[dtag] = {} for chain, chain_alignments in dataset_alignments.items(): alignments[dtag][chain] = {} - for residue, ligand_neighbourhood_alignments in chain_alignments.items(): - # print(residue) - alignments[dtag][chain][residue] = LigandNeighbourhoodOutput.to_dict( + for residue, residue_alignments in chain_alignments.items(): + alignments[dtag][chain][residue] = {} + for version, ligand_neighbourhood_alignments in residue_alignments.items(): + alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.to_dict( ligand_neighbourhood_alignments) reference_alignments = {} From 28cc0c6abf8950f44ef2b9df98c1a1fc3e703874 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:32:37 +0100 Subject: [PATCH 75/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index bdb8d4fb..7632ce4b 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1028,8 +1028,8 @@ def _update_fs_model( if chain not in alignments[dtag]: alignments[dtag][chain] = {} if residue not in alignments[dtag][chain]: - alignments[dtag][chain][version] = {} - if version not in alignments[dtag][chain][residue][version]: + alignments[dtag][chain][residue] = {} + if version not in alignments[dtag][chain][residue]: alignments[dtag][chain][residue][version] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue][version] From 9af870fc089b94447502ec4a8ec6661499b0fa14 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:34:52 +0100 Subject: [PATCH 76/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 97ab9da2..193a5601 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -37,7 +37,7 @@ ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_artefacts.pdb" ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_2FoFc.ccp4" ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_FoFc.ccp4" -ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_{}_event.ccp4" +ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_event.ccp4" FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" From 3cb50e0a29590492c29c35b0616d19140ce0b9a1 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:36:40 +0100 Subject: [PATCH 77/90] New approach to treat version differently in fs model --- .../alignment_heirarchy.py | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py index 905fef6d..4f0a23a0 100644 --- a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py +++ b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py @@ -1,10 +1,69 @@ from ligand_neighbourhood_alignment import dt -def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]): +AlignmentHeirarchy: dict[str, tuple[str, str]] + +def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]) -> AlignmentHeirarchy: # The Alignment hierarchy is the graph of alignments one must perform in order to get from # a ligand canonical site to the Reference Assembly Frame - # + # In order to calculate the assembly the following steps are performed: + # 1. Determine the Assembly priority + # 2. Determine the Chain priority + # 3. Find each assembly's reference + # 4. Check per-chain RMSDs and warn if any are high + + # 1. Determine the Assembly priority + assembly_priority = {_j: _assembly_name for _j, _assembly_name in enumerate(assemblies)} + + # 2. Determine the Chain priority and map assembly names to chains + chain_priority = {} + assembly_chains = {} + chain_priority_count = 0 + for _j, _assembly_name in assembly_priority.items(): + assembly = assemblies[_assembly_name] + assembly_chains[_assembly_name] = [] + for _generator in assembly.generators: + _biological_chain_name = _generator.biomol + assembly_chains[_assembly_name].append(_biological_chain_name) + if _biological_chain_name not in chain_priority.values(): + chain_priority[chain_priority_count] = _biological_chain_name + chain_priority_count += 1 + + # 3. Find each assembly's reference + reference_assemblies = {} + for _assembly_name, _assembly in assemblies.items(): + # Get the highest priority chain + reference_chain = min( + [_generator.chain for _generator in _assembly.generators], + key= lambda _x: chain_priority[_x] + ) + + # Get the highest priority assembly in which it occurs + reference_assembly = min( + [_assembly_name for _assembly_name in assembly_chains if reference_chain in assembly_chains[_assembly_name]], + key= lambda _x: assembly_priority[_x] + ) + reference_assemblies[_assembly_name] = (reference_assembly, reference_chain) + + # 4. Check per-chain RMSDs and warn if any are high + # TODO + + return reference_assemblies + +def _chain_to_biochain(chain_name, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly]) -> str: + for _xtal_assembly_name, _xtal_assembly in xtalform.assemblies.items(): + for _j, _chain_name in enumerate(_xtal_assembly.chains): + if chain_name == _chain_name: + return assemblies[_xtal_assembly.assembly].generators[_j].biomol + + +StructureLandmarks: dict[tuple[str, str, str], tuple[float, float, float]] +def _calculate_assembly_transform( + assembly_name: str, + alignment_heirarchy: AlignmentHeirarchy, + assembly_landmarks: dict[str, StructureLandmarks] +): + # Get the chain to align to ... \ No newline at end of file From afc3ae821493165222c130b58ce7ae01cd99906f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:56:47 +0100 Subject: [PATCH 78/90] New approach to treat version differently in fs model --- .../alignment_heirarchy.py | 10 + src/ligand_neighbourhood_alignment/cli.py | 262 +++++++++--------- .../constants.py | 10 +- .../generate_sites_from_components.py | 4 +- 4 files changed, 150 insertions(+), 136 deletions(-) create mode 100644 src/ligand_neighbourhood_alignment/alignment_heirarchy.py diff --git a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py new file mode 100644 index 00000000..905fef6d --- /dev/null +++ b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py @@ -0,0 +1,10 @@ +from ligand_neighbourhood_alignment import dt + +def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]): + # The Alignment hierarchy is the graph of alignments one must perform in order to get from + # a ligand canonical site to the Reference Assembly Frame + + # + + + ... \ No newline at end of file diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 4abfa0ed..bdb8d4fb 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -890,7 +890,7 @@ def _update_canonical_sites( # If not matched to any existing canonical site create a new one if not matched: - centroid_res = _get_centroid_res( + centroid_res = _get_centroid_res( conformer_site.residues, neighbourhoods[conformer_site.reference_ligand_id] ) @@ -1028,9 +1028,11 @@ def _update_fs_model( if chain not in alignments[dtag]: alignments[dtag][chain] = {} if residue not in alignments[dtag][chain]: - alignments[dtag][chain][residue] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) + alignments[dtag][chain][version] = {} + if version not in alignments[dtag][chain][residue][version]: + alignments[dtag][chain][residue][version] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) - ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue] + ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue][version] if not (fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag).exists(): os.mkdir(fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag) @@ -1038,33 +1040,33 @@ def _update_fs_model( if canonical_site_id not in ligand_neighbourhood_output.aligned_structures: ligand_neighbourhood_output.aligned_structures[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id ) ) ligand_neighbourhood_output.aligned_artefacts[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version,site=canonical_site_id ) ) ligand_neighbourhood_output.aligned_xmaps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_XMAP_TEMPLATE.format( dtag=dtag, chain=chain, - residue=residue, + residue=residue,version=version, site=canonical_site_id) ) ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_DIFF_TEMPLATE.format( dtag=dtag, chain=chain, - residue=residue, + residue=residue, version=version, site=canonical_site_id) ) ligand_neighbourhood_output.aligned_event_maps[canonical_site_id] = ( fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=canonical_site_id + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id ) ) @@ -1315,40 +1317,41 @@ def _update( # for lid in conformer_site.ligand_ids: for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): - for residue, ligand_neighbourhood_output in chain_alignment_info.items(): - for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): - if not Path(aligned_structure_path).exists(): - # _update_aligned_structures() - _structure = structures[dtag].clone() - canonical_site = canonical_sites[canonical_site_id] - # Check for the matching conformer site - conformer_site = None - for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: - conformer_site = conformer_sites[conformer_site_id] - break - if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") - continue - moving_ligand_id = (dtag, chain, residue, version) - reference_ligand_id = conformer_site.reference_ligand_id - print(aligned_structure_path) - _align_structure( - _structure, - moving_ligand_id, - reference_ligand_id, - ligand_neighbourhoods[moving_ligand_id], - alignability_graph, - ligand_neighbourhood_transforms, - conformer_site_transforms, - # canonical_site_transforms, - canonical_site_id, - conformer_site_id, - xtalforms[dataset_assignments[dtag]], - aligned_structure_path, - ) - else: - logger.info(f"Already output structure!") + for residue, residue_alignment_info in chain_alignment_info.items(): + for version, ligand_neighbourhood_output in residue_alignment_info.items(): + for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): + if not Path(aligned_structure_path).exists(): + # _update_aligned_structures() + _structure = structures[dtag].clone() + canonical_site = canonical_sites[canonical_site_id] + # Check for the matching conformer site + conformer_site = None + for conformer_site_id in canonical_site.conformer_site_ids: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: + conformer_site = conformer_sites[conformer_site_id] + break + if conformer_site is None: + print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") + continue + moving_ligand_id = (dtag, chain, residue, version) + reference_ligand_id = conformer_site.reference_ligand_id + print(aligned_structure_path) + _align_structure( + _structure, + moving_ligand_id, + reference_ligand_id, + ligand_neighbourhoods[moving_ligand_id], + alignability_graph, + ligand_neighbourhood_transforms, + conformer_site_transforms, + # canonical_site_transforms, + canonical_site_id, + conformer_site_id, + xtalforms[dataset_assignments[dtag]], + aligned_structure_path, + ) + else: + logger.info(f"Already output structure!") # Generate alignments of references to each canonical site # for canonical_site_id, canonical_site in canonical_sites.items(): @@ -1389,93 +1392,94 @@ def _update( logger.info(f"Outputting xmaps...") for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): - for residue, ligand_neighbourhood_output in chain_alignment_info.items(): - for canonical_site_id, aligned_event_map_path in ligand_neighbourhood_output.aligned_event_maps.items(): - logger.info(f"Writing to: {aligned_event_map_path}") - if not Path(aligned_event_map_path).exists(): - _structure = structures[dtag].clone() - canonical_site = canonical_sites[canonical_site_id] - # Check for the matching conformer site - conformer_site = None - for conformer_site_id in canonical_site.conformer_site_ids: - if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: - conformer_site = conformer_sites[conformer_site_id] - break - - if conformer_site is None: - print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") - continue - - moving_ligand_id = (dtag, chain, residue, version) - reference_ligand_id = conformer_site.reference_ligand_id - # print(ligand_neighbourhoods) - - xmap_path = datasets[dtag].ligand_binding_events[(dtag, chain, residue)].xmap - - aligned_structure_path = ligand_neighbourhood_output.aligned_structures[canonical_site_id] - aligned_structure = gemmi.read_structure(str(aligned_structure_path)) - aligned_res = aligned_structure[0][chain][str(residue)][0] - - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].dtag) - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].chain) - # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].residue) # * - if (xmap_path != "None") and (xmap_path is not None): - xmap = read_xmap(xmap_path) - - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - aligned_event_map_path, - aligned_res - ) - mtz_path = datasets[dtag].mtz - # print(f"Mtz path: {mtz_path}") - # raise Exception - if mtz_path != "None": - xmap = read_xmap_from_mtz(mtz_path, "2Fo-Fc") - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - ligand_neighbourhood_output.aligned_xmaps[canonical_site_id], - aligned_res - ) - xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") - __align_xmap( - ligand_neighbourhoods[(dtag, chain, residue, version)], - alignability_graph, - ligand_neighbourhood_transforms, - reference_xmap, - reference_ligand_id, - moving_ligand_id, - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id], - aligned_res - ) - - else: - logger.info(f"Already output xmap!") + for residue, residue_alignment_info in chain_alignment_info.items(): + for version, ligand_neighbourhood_output in residue_alignment_info.items(): + for canonical_site_id, aligned_event_map_path in ligand_neighbourhood_output.aligned_event_maps.items(): + logger.info(f"Writing to: {aligned_event_map_path}") + if not Path(aligned_event_map_path).exists(): + _structure = structures[dtag].clone() + canonical_site = canonical_sites[canonical_site_id] + # Check for the matching conformer site + conformer_site = None + for conformer_site_id in canonical_site.conformer_site_ids: + if (dtag, chain, residue, version) in conformer_sites[conformer_site_id].members: + conformer_site = conformer_sites[conformer_site_id] + break + + if conformer_site is None: + print(f"Skipping alignment of {dtag} {chain} {residue} to site {canonical_site_id}!") + continue + + moving_ligand_id = (dtag, chain, residue, version) + reference_ligand_id = conformer_site.reference_ligand_id + # print(ligand_neighbourhoods) + + xmap_path = datasets[dtag].ligand_binding_events[(dtag, chain, residue)].xmap + + aligned_structure_path = ligand_neighbourhood_output.aligned_structures[canonical_site_id] + aligned_structure = gemmi.read_structure(str(aligned_structure_path)) + aligned_res = aligned_structure[0][chain][str(residue)][0] + + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].dtag) + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].chain) + # logger.info(datasets[dtag].ligand_binding_events[(dtag, chain, residue)].residue) # * + if (xmap_path != "None") and (xmap_path is not None): + xmap = read_xmap(xmap_path) + + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + aligned_event_map_path, + aligned_res + ) + mtz_path = datasets[dtag].mtz + # print(f"Mtz path: {mtz_path}") + # raise Exception + if mtz_path != "None": + xmap = read_xmap_from_mtz(mtz_path, "2Fo-Fc") + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + ligand_neighbourhood_output.aligned_xmaps[canonical_site_id], + aligned_res + ) + xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") + __align_xmap( + ligand_neighbourhoods[(dtag, chain, residue, version)], + alignability_graph, + ligand_neighbourhood_transforms, + reference_xmap, + reference_ligand_id, + moving_ligand_id, + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id], + aligned_res + ) + + else: + logger.info(f"Already output xmap!") return fs_model diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 73e865b8..568e4a3f 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -33,11 +33,11 @@ MODEL_DIR_MTZ: str = "refine.mtz" OUTPUT_JSON_PATH: str = "output.json" -ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}.pdb" -ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_artefacts.pdb" -ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_sigmaa.ccp4" -ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_diff.ccp4" -ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{site}_event.ccp4" +ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}.pdb" +ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_artefacts.pdb" +ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_sigmaa.ccp4" +ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_diff.ccp4" +ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_{}_event.ccp4" FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" diff --git a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py index 7d8aa404..6337c76b 100644 --- a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py +++ b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py @@ -247,10 +247,10 @@ def _update_conformer_site_transforms( # conformer_site_residues = conformer_site.residues transform = _get_transform_from_residues( - [(x[0], x[1]) for x in canonical_site.residues], structures[conformer_site.reference_ligand_id[0]], - structures[ref_conformer_site.reference_ligand_id[0]]) + structures[ref_conformer_site.reference_ligand_id[0]], + ) conformer_site_transforms[key] = dt.Transform(transform.vec.tolist(), transform.mat.tolist()) From a164475541ea519a24153514528501201b70736b Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:59:33 +0100 Subject: [PATCH 79/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/dt.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 14a47eae..8ddefd2c 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -201,10 +201,13 @@ def from_dict(dic): alignments[dtag] = {} for chain, chain_alignments in dataset_alignments.items(): alignments[dtag][chain] = {} - for residue, ligand_neighbourhood_alignments in chain_alignments.items(): - # _dtag, _chain, _residue = ligand_neighbourhood.split("/") - alignments[dtag][chain][residue] = LigandNeighbourhoodOutput.from_dict( - ligand_neighbourhood_alignments, source_dir) + for residue, residue_alignments in chain_alignments.items(): + alignments[dtag][chain][residue] = {} + + for version, ligand_neighbourhood_alignments in residue_alignments.items(): + # _dtag, _chain, _residue = ligand_neighbourhood.split("/") + alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.from_dict( + ligand_neighbourhood_alignments, source_dir) # reference_alignments = {} # for dtag, dataset_alignments in alignments["reference_alignments"].items(): @@ -251,9 +254,10 @@ def to_dict(self, ): alignments[dtag] = {} for chain, chain_alignments in dataset_alignments.items(): alignments[dtag][chain] = {} - for residue, ligand_neighbourhood_alignments in chain_alignments.items(): - # print(residue) - alignments[dtag][chain][residue] = LigandNeighbourhoodOutput.to_dict( + for residue, residue_alignments in chain_alignments.items(): + alignments[dtag][chain][residue] = {} + for version, ligand_neighbourhood_alignments in residue_alignments.items(): + alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.to_dict( ligand_neighbourhood_alignments) reference_alignments = {} From 6d19d127a2257dac7918a2bf4cd388c22d54d9a1 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:32:37 +0100 Subject: [PATCH 80/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index bdb8d4fb..7632ce4b 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -1028,8 +1028,8 @@ def _update_fs_model( if chain not in alignments[dtag]: alignments[dtag][chain] = {} if residue not in alignments[dtag][chain]: - alignments[dtag][chain][version] = {} - if version not in alignments[dtag][chain][residue][version]: + alignments[dtag][chain][residue] = {} + if version not in alignments[dtag][chain][residue]: alignments[dtag][chain][residue][version] = dt.LigandNeighbourhoodOutput({}, {}, {}, {}, {}) ligand_neighbourhood_output: dt.LigandNeighbourhoodOutput = alignments[dtag][chain][residue][version] From 9b6c679a629a1bee4456741b0b8b60b129c13d57 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 17 Apr 2024 17:34:52 +0100 Subject: [PATCH 81/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/constants.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 568e4a3f..193a5601 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -35,9 +35,9 @@ OUTPUT_JSON_PATH: str = "output.json" ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}.pdb" ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_artefacts.pdb" -ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_sigmaa.ccp4" -ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_diff.ccp4" -ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_{}_event.ccp4" +ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_2FoFc.ccp4" +ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_FoFc.ccp4" +ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_event.ccp4" FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" From e61f54c31eb704b8582fb13b01ee44104d3ee947 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:36:40 +0100 Subject: [PATCH 82/90] New approach to treat version differently in fs model --- .../alignment_heirarchy.py | 63 ++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py index 905fef6d..4f0a23a0 100644 --- a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py +++ b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py @@ -1,10 +1,69 @@ from ligand_neighbourhood_alignment import dt -def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]): +AlignmentHeirarchy: dict[str, tuple[str, str]] + +def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]) -> AlignmentHeirarchy: # The Alignment hierarchy is the graph of alignments one must perform in order to get from # a ligand canonical site to the Reference Assembly Frame - # + # In order to calculate the assembly the following steps are performed: + # 1. Determine the Assembly priority + # 2. Determine the Chain priority + # 3. Find each assembly's reference + # 4. Check per-chain RMSDs and warn if any are high + + # 1. Determine the Assembly priority + assembly_priority = {_j: _assembly_name for _j, _assembly_name in enumerate(assemblies)} + + # 2. Determine the Chain priority and map assembly names to chains + chain_priority = {} + assembly_chains = {} + chain_priority_count = 0 + for _j, _assembly_name in assembly_priority.items(): + assembly = assemblies[_assembly_name] + assembly_chains[_assembly_name] = [] + for _generator in assembly.generators: + _biological_chain_name = _generator.biomol + assembly_chains[_assembly_name].append(_biological_chain_name) + if _biological_chain_name not in chain_priority.values(): + chain_priority[chain_priority_count] = _biological_chain_name + chain_priority_count += 1 + + # 3. Find each assembly's reference + reference_assemblies = {} + for _assembly_name, _assembly in assemblies.items(): + # Get the highest priority chain + reference_chain = min( + [_generator.chain for _generator in _assembly.generators], + key= lambda _x: chain_priority[_x] + ) + + # Get the highest priority assembly in which it occurs + reference_assembly = min( + [_assembly_name for _assembly_name in assembly_chains if reference_chain in assembly_chains[_assembly_name]], + key= lambda _x: assembly_priority[_x] + ) + reference_assemblies[_assembly_name] = (reference_assembly, reference_chain) + + # 4. Check per-chain RMSDs and warn if any are high + # TODO + + return reference_assemblies + +def _chain_to_biochain(chain_name, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly]) -> str: + for _xtal_assembly_name, _xtal_assembly in xtalform.assemblies.items(): + for _j, _chain_name in enumerate(_xtal_assembly.chains): + if chain_name == _chain_name: + return assemblies[_xtal_assembly.assembly].generators[_j].biomol + + +StructureLandmarks: dict[tuple[str, str, str], tuple[float, float, float]] +def _calculate_assembly_transform( + assembly_name: str, + alignment_heirarchy: AlignmentHeirarchy, + assembly_landmarks: dict[str, StructureLandmarks] +): + # Get the chain to align to ... \ No newline at end of file From 9c3898214cb7a825436128a67e7ee89bf99d5070 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:44:31 +0100 Subject: [PATCH 83/90] New approach to treat version differently in fs model --- src/ligand_neighbourhood_alignment/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 193a5601..9b6fae7c 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -35,8 +35,8 @@ OUTPUT_JSON_PATH: str = "output.json" ALIGNED_STRUCTURE_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}.pdb" ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_artefacts.pdb" -ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_2FoFc.ccp4" -ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_FoFc.ccp4" +ALIGNED_XMAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_sigmaa.ccp4" +ALIGNED_DIFF_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_diff.ccp4" ALIGNED_EVENT_MAP_TEMPLATE: str = "{dtag}_{chain}_{residue}_{version}_{site}_event.ccp4" From b136a7dc9144c68ccb80de284211f6ffbb68537a Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Wed, 24 Apr 2024 11:48:57 +0100 Subject: [PATCH 84/90] New approach to treat version differently in fs model --- .github/pages/make_switcher.py | 9 +- .../align_xmaps.py | 137 ++-- .../alignment_heirarchy.py | 21 +- src/ligand_neighbourhood_alignment/cli.py | 616 +++++++++--------- .../constants.py | 4 +- src/ligand_neighbourhood_alignment/data.py | 5 - src/ligand_neighbourhood_alignment/dt.py | 482 ++++++-------- .../generate_aligned_structures.py | 118 ++-- .../generate_sites_from_components.py | 47 +- .../get_alignability.py | 59 +- .../get_alignable_sites.py | 18 +- .../get_canonical_sites.py | 12 +- .../get_ligand_neighbourhoods.py | 47 +- .../get_transforms.py | 4 +- .../make_data_json.py | 71 +- .../matching.py | 14 +- .../save_neighbourhoods.py | 4 +- .../structures.py | 3 +- tests/test_boilerplate_removed.py | 3 +- 19 files changed, 750 insertions(+), 924 deletions(-) diff --git a/.github/pages/make_switcher.py b/.github/pages/make_switcher.py index 39c12772..fffb50bb 100755 --- a/.github/pages/make_switcher.py +++ b/.github/pages/make_switcher.py @@ -58,19 +58,14 @@ def get_versions(ref: str, add: Optional[str], remove: Optional[str]) -> List[st def write_json(path: Path, repository: str, versions: str): org, repo_name = repository.split("/") - struct = [ - dict(version=version, url=f"https://{org}.github.io/{repo_name}/{version}/") - for version in versions - ] + struct = [dict(version=version, url=f"https://{org}.github.io/{repo_name}/{version}/") for version in versions] text = json.dumps(struct, indent=2) print(f"JSON switcher:\n{text}") path.write_text(text) def main(args=None): - parser = ArgumentParser( - description="Make a versions.txt file from gh-pages directories" - ) + parser = ArgumentParser(description="Make a versions.txt file from gh-pages directories") parser.add_argument( "--add", help="Add this directory to the list of existing directories", diff --git a/src/ligand_neighbourhood_alignment/align_xmaps.py b/src/ligand_neighbourhood_alignment/align_xmaps.py index 635ff851..4bb4762c 100644 --- a/src/ligand_neighbourhood_alignment/align_xmaps.py +++ b/src/ligand_neighbourhood_alignment/align_xmaps.py @@ -249,10 +249,10 @@ def _get_interpolation_range(neighbourhood: dt.Neighbourhood, transform, referen def interpolate_range( - reference_xmap, - xmap, - interpolation_ranges: list[Block], - transform, + reference_xmap, + xmap, + interpolation_ranges: list[Block], + transform, ): # Make a xmap on reference template new_xmap = gemmi.FloatGrid(reference_xmap.nu, reference_xmap.nv, reference_xmap.nw) @@ -294,26 +294,26 @@ def interpolate_range( logger.debug(f"Block Z Range in output xmap: {rzi} : {rzf}") grid_np[ - rxi:rxf, - ryi:ryf, - rzi:rzf, + rxi:rxf, + ryi:ryf, + rzi:rzf, ] = arr return new_xmap def align_xmap( - neighbourhoods: LigandNeighbourhoods, - g, - transforms: Transforms, - site_transforms: SiteTransforms, - reference_xmap, - subsite_reference_id: LigandID, - site_id: int, - subsite_id: int, - lid: LigandID, - xmap, - output_path: Path, + neighbourhoods: LigandNeighbourhoods, + g, + transforms: Transforms, + site_transforms: SiteTransforms, + reference_xmap, + subsite_reference_id: LigandID, + site_id: int, + subsite_id: int, + lid: LigandID, + xmap, + output_path: Path, ): # Get the ligand neighbourhood neighbourhood: LigandNeighbourhood = neighbourhoods.get_neighbourhood(lid) @@ -392,9 +392,11 @@ def _get_box(neighbourhood: dt.Neighbourhood, xmap, transform): ) return box + def _write_xmap_from_ccp4(ccp4, path): ccp4.write_ccp4_map(str(path)) + def _write_xmap(xmap, path: Path, neighbourhood: dt.Neighbourhood, transform): ccp4 = gemmi.Ccp4Map() @@ -416,7 +418,6 @@ def _write_xmap(xmap, path: Path, neighbourhood: dt.Neighbourhood, transform): ccp4.setup(float("nan")) ccp4.update_ccp4_header() - ccp4.write_ccp4_map(str(path)) @@ -441,19 +442,14 @@ def get_frame_bounds(ligand_lower_bound, ligand_upper_bound, border, step): def get_frame_array(frame_lower_bound, frame_upper_bound, step): interval = np.round((frame_upper_bound - frame_lower_bound) / step) - return np.zeros( - (int(interval[0]), int(interval[1]), int(interval[2])), - dtype=np.float32 - ) + return np.zeros((int(interval[0]), int(interval[1]), int(interval[2])), dtype=np.float32) ... def get_frame_transform(frame_lower_bound, frame_array, step): tr = gemmi.Transform() tr.vec.fromlist([x for x in frame_lower_bound]) - tr.mat.fromlist( - (np.eye(3) * step).tolist() - ) + tr.mat.fromlist((np.eye(3) * step).tolist()) return tr ... @@ -461,14 +457,7 @@ def get_frame_transform(frame_lower_bound, frame_array, step): def get_cell(frame_array, step): shape = frame_array.shape - cell = gemmi.UnitCell( - shape[0] * step, - shape[1] * step, - shape[2] * step, - 90.0, - 90.0, - 90.0 - ) + cell = gemmi.UnitCell(shape[0] * step, shape[1] * step, shape[2] * step, 90.0, 90.0, 90.0) return cell @@ -480,7 +469,7 @@ def get_new_map(cell, sample, frame_min, step): ccp4 = gemmi.Ccp4Map() ccp4.grid = grid ccp4.grid.set_unit_cell(cell) - ccp4.grid.spacegroup = gemmi.SpaceGroup('P1') + ccp4.grid.spacegroup = gemmi.SpaceGroup("P1") ccp4.update_ccp4_header() # ccp4.set_header_float(50, frame_min[0]/step) # ccp4.set_header_float(51, frame_min[1]/step) @@ -490,10 +479,8 @@ def get_new_map(cell, sample, frame_min, step): ccp4.set_header_float(52, frame_min[2]) return ccp4 -def resample_xmap( - new_xmap, - aligned_res - ): + +def resample_xmap(new_xmap, aligned_res): step = 0.5 border = 5.0 m = new_xmap @@ -525,20 +512,21 @@ def resample_xmap( new_map = get_new_map(cell, frame_array, frame_lower_bound, step) return new_map + def __align_xmap( - neighbourhood: dt.Neighbourhood, - g, - ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], - reference_xmap, - subsite_reference_id: tuple[str,str,str], - lid: tuple[str, str, str], - xmap, - conformer_site_transforms, - conformer_site_id, - # canonical_site_transforms, - canonical_site_id, - output_path: Path, - aligned_res + neighbourhood: dt.Neighbourhood, + g, + ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], + reference_xmap, + subsite_reference_id: tuple[str, str, str], + lid: tuple[str, str, str], + xmap, + conformer_site_transforms, + conformer_site_id, + # canonical_site_transforms, + canonical_site_id, + output_path: Path, + aligned_res, ): # Get the ligand neighbourhood # neighbourhood: LigandNeighbourhood = neighbourhoods.get_neighbourhood(lid) @@ -590,10 +578,7 @@ def __align_xmap( ) # Resample the xmap to the aligned structure frame - resampled_xmap = resample_xmap( - new_xmap, - aligned_res - ) + resampled_xmap = resample_xmap(new_xmap, aligned_res) # Output the xmap # _write_xmap( @@ -606,13 +591,13 @@ def __align_xmap( _write_xmap_from_ccp4( resampled_xmap, output_path, - ) def read_xmap_from_mtz( - mtz_path: Path, - map_type="2Fo-Fc",): + mtz_path: Path, + map_type="2Fo-Fc", +): mtz = gemmi.read_mtz_file(str(mtz_path)) if map_type == "2Fo-Fc": @@ -644,15 +629,15 @@ def read_xmap_from_mtz( def _align_xmaps( - system_data: SystemData, - structures, - canonical_sites: CanonicalSites, - conformer_sites: ConformerSites, - neighbourhoods: LigandNeighbourhoods, - g, - transforms: Transforms, - site_transforms: SiteTransforms, - output: Output, + system_data: SystemData, + structures, + canonical_sites: CanonicalSites, + conformer_sites: ConformerSites, + neighbourhoods: LigandNeighbourhoods, + g, + transforms: Transforms, + site_transforms: SiteTransforms, + output: Output, ): # Get the global reference # reference_lid: LigandID = canonical_sites.reference_site.reference_ligand_id @@ -779,14 +764,14 @@ def _align_xmaps( def _align_xmap( - system_data: SystemData, - canonical_sites: CanonicalSites, - conformer_sites: ConformerSites, - neighbourhoods: LigandNeighbourhoods, - g, - transforms: Transforms, - site_transforms: SiteTransforms, - output: Output, + system_data: SystemData, + canonical_sites: CanonicalSites, + conformer_sites: ConformerSites, + neighbourhoods: LigandNeighbourhoods, + g, + transforms: Transforms, + site_transforms: SiteTransforms, + output: Output, ): # Get the global reference # reference_lid: LigandID = canonical_sites.reference_site.reference_ligand_id diff --git a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py index 4f0a23a0..a709d0bf 100644 --- a/src/ligand_neighbourhood_alignment/alignment_heirarchy.py +++ b/src/ligand_neighbourhood_alignment/alignment_heirarchy.py @@ -2,6 +2,7 @@ AlignmentHeirarchy: dict[str, tuple[str, str]] + def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]) -> AlignmentHeirarchy: # The Alignment hierarchy is the graph of alignments one must perform in order to get from # a ligand canonical site to the Reference Assembly Frame @@ -34,14 +35,17 @@ def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]) -> Alignment for _assembly_name, _assembly in assemblies.items(): # Get the highest priority chain reference_chain = min( - [_generator.chain for _generator in _assembly.generators], - key= lambda _x: chain_priority[_x] + [_generator.chain for _generator in _assembly.generators], key=lambda _x: chain_priority[_x] ) # Get the highest priority assembly in which it occurs reference_assembly = min( - [_assembly_name for _assembly_name in assembly_chains if reference_chain in assembly_chains[_assembly_name]], - key= lambda _x: assembly_priority[_x] + [ + _assembly_name + for _assembly_name in assembly_chains + if reference_chain in assembly_chains[_assembly_name] + ], + key=lambda _x: assembly_priority[_x], ) reference_assemblies[_assembly_name] = (reference_assembly, reference_chain) @@ -50,6 +54,7 @@ def _derive_alignment_heirarchy(assemblies: dict[str, dt.Assembly]) -> Alignment return reference_assemblies + def _chain_to_biochain(chain_name, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly]) -> str: for _xtal_assembly_name, _xtal_assembly in xtalform.assemblies.items(): for _j, _chain_name in enumerate(_xtal_assembly.chains): @@ -57,13 +62,11 @@ def _chain_to_biochain(chain_name, xtalform: dt.XtalForm, assemblies: dict[str, return assemblies[_xtal_assembly.assembly].generators[_j].biomol - StructureLandmarks: dict[tuple[str, str, str], tuple[float, float, float]] + def _calculate_assembly_transform( - assembly_name: str, - alignment_heirarchy: AlignmentHeirarchy, - assembly_landmarks: dict[str, StructureLandmarks] + assembly_name: str, alignment_heirarchy: AlignmentHeirarchy, assembly_landmarks: dict[str, StructureLandmarks] ): # Get the chain to align to - ... \ No newline at end of file + ... diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 7632ce4b..b0c1f257 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -69,8 +69,7 @@ get_subsite_transforms, _update_conformer_site_transforms, _update_canonical_site_transforms, - _update_reference_structure_transforms - + _update_reference_structure_transforms, ) from ligand_neighbourhood_alignment.get_alignability import get_alignability, _update_ligand_neighbourhood_transforms from ligand_neighbourhood_alignment.get_graph import get_graph @@ -109,12 +108,12 @@ def _change_site_reference(_source_dir: Path, site_id: int, subsite_id: int): def _change_subsite_reference( - _source_dir: Path, - site_id: int, - subsite_id: int, - dtag: int, - chain: str, - residue: int, + _source_dir: Path, + site_id: int, + subsite_id: int, + dtag: int, + chain: str, + residue: int, ): sites: CanonicalSites = read_canonical_sites(_source_dir) site = sites.get_site(site_id) @@ -140,11 +139,10 @@ def _add_model_building_dir_to_system_data(system_data: SystemData, _data_source ] else: new_datasources = [ - _datasource for _datasource in system_data.datasources if - _datasource.path != str(_data_source_dir) - ] + [ - datasource, - ] + _datasource for _datasource in system_data.datasources if _datasource.path != str(_data_source_dir) + ] + [ + datasource, + ] system_data.datasources = new_datasources return system_data @@ -175,11 +173,10 @@ def _add_manual_dir_to_system_data(system_data: SystemData, _data_source_dir: Pa ] else: new_datasources = [ - _datasource for _datasource in system_data.datasources if - _datasource.path != str(_data_source_dir) - ] + [ - datasource, - ] + _datasource for _datasource in system_data.datasources if _datasource.path != str(_data_source_dir) + ] + [ + datasource, + ] system_data.datasources = new_datasources return system_data @@ -408,10 +405,10 @@ def _get_assigned_xtalforms(system_data, xtalforms): def _assign_xtalforms( - _source_dir: Path, - assemblies: Assemblies, - xtalforms: XtalForms, - system_data: SystemData, + _source_dir: Path, + assemblies: Assemblies, + xtalforms: XtalForms, + system_data: SystemData, ): assigned_xtalforms = _get_assigned_xtalforms(system_data, xtalforms) @@ -489,16 +486,11 @@ def _assign_dataset(dataset, assemblies, xtalforms, structure, structures): def _save_assignments(fs_model: dt.FSModel, dataset_assignments: dict[str, str]): - with open(fs_model.dataset_assignments, 'w') as f: + with open(fs_model.dataset_assignments, "w") as f: yaml.safe_dump(dataset_assignments, f) -def _generate_assembly( - xtalform: dt.XtalForm, - structure, - assemblies: dict[str, dt.Assembly], - pdb -): +def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb): full_st = structure.clone() chains_to_delete = [] for model in full_st: @@ -513,9 +505,9 @@ def _generate_assembly( # chains = xtalform_assembly.chains # reference = assembly.reference for _biogen, _chain, _transform in zip( - assembly.generators, - xtalform_assembly.chains, - xtalform_assembly.transforms, + assembly.generators, + xtalform_assembly.chains, + xtalform_assembly.transforms, ): # for generator in assembly.generators: @@ -526,12 +518,12 @@ def _generate_assembly( chain_clone = structure[0][_chain].clone() except Exception as e: raise Exception( - f'An Exception occurred in generating the biological assemblies for\n' - f'{pdb}\n' - f'Based on the assembly, the expected chains were: {xtalform_assembly.chains}\n' - f'However the chains in the structure were: {[_x.name for _x in structure[0]]}\n' - 'XCA does not currently handle datasets with a mis-match between the xtalform chains.\n' - 'You should ensure that the chain names are consistent with the reference dataset for the xtalforms.' + f"An Exception occurred in generating the biological assemblies for\n" + f"{pdb}\n" + f"Based on the assembly, the expected chains were: {xtalform_assembly.chains}\n" + f"However the chains in the structure were: {[_x.name for _x in structure[0]]}\n" + "XCA does not currently handle datasets with a mis-match between the xtalform chains.\n" + "You should ensure that the chain names are consistent with the reference dataset for the xtalforms." ) for residue in chain_clone: @@ -573,8 +565,11 @@ def _get_structure_fragments(dataset: dt.Dataset, structure, version): # ligand_id = (dataset.dtag, str(chain.name), str(lbe.residue),) # fragments[ligand_id] = residue # lig_number = lig_number + 1 - if (str(lbe[2]) == str(residue.seqid.num)) & (str(lbe[1]) == str(source_chain)) & ( - transform == "x,y,z"): + if ( + (str(lbe[2]) == str(residue.seqid.num)) + & (str(lbe[1]) == str(source_chain)) + & (transform == "x,y,z") + ): ligand_id = (dataset.dtag, str(lbe[1]), str(lbe[2]), str(version)) fragments[ligand_id] = residue @@ -585,11 +580,7 @@ def _get_structure_fragments(dataset: dt.Dataset, structure, version): def _get_dataset_neighbourhoods( - dataset: dt.Dataset, - xtalform: dt.XtalForm, - assemblies: dict[str, dt.Assembly], - version, - max_radius: float = 9.0 + dataset: dt.Dataset, xtalform: dt.XtalForm, assemblies: dict[str, dt.Assembly], version, max_radius: float = 9.0 ) -> dict[tuple[str, str, str, str], dt.Neighbourhood]: # Load the structure logger.debug(dataset.pdb) @@ -600,11 +591,7 @@ def _get_dataset_neighbourhoods( assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb) # Get the bound fragments - fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments( - dataset, - assembly, - version - ) + fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments(dataset, assembly, version) logger.debug(f"Get {len(fragments)} fragment neighbourhoods") logger.debug(fragments) @@ -630,10 +617,10 @@ def _get_dataset_neighbourhoods( def _get_neighbourhoods( - dataset: dt.Dataset, - xtalform: dt.XtalForm, - assemblies: dict[str, dt.Assembly], - version, + dataset: dt.Dataset, + xtalform: dt.XtalForm, + assemblies: dict[str, dt.Assembly], + version, ): dataset_ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood] = _get_dataset_neighbourhoods( dataset, xtalform, assemblies, version @@ -642,10 +629,10 @@ def _get_neighbourhoods( def _save_neighbourhoods( - fs_model: dt.FSModel, - ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + fs_model: dt.FSModel, + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], ): - with open(fs_model.ligand_neighbourhoods, 'w') as f: + with open(fs_model.ligand_neighbourhoods, "w") as f: dic = {} for ligand_id, neighbourhood in ligand_neighbourhoods.items(): dic["/".join(ligand_id)] = neighbourhood.to_dict() @@ -653,23 +640,18 @@ def _save_neighbourhoods( def _save_ligand_neighbourhood_transforms(fs_model, ligand_neighbourhood_transforms): - with open(fs_model.ligand_neighbourhood_transforms, 'w') as f: + with open(fs_model.ligand_neighbourhood_transforms, "w") as f: dic = {} for (to_ligand_id, from_ligand_id), transform in ligand_neighbourhood_transforms.items(): - key = "~".join( - [ - "/".join(to_ligand_id), - "/".join(from_ligand_id) - ] - ) + key = "~".join(["/".join(to_ligand_id), "/".join(from_ligand_id)]) dic[key] = transform.to_dict() yaml.safe_dump(dic, f) def _update_graph( - alignability_graph, - ligand_neighbourhoods, - ligand_neighbourhood_transforms, + alignability_graph, + ligand_neighbourhoods, + ligand_neighbourhood_transforms, ): nodes = alignability_graph.nodes edges = alignability_graph.edges @@ -691,14 +673,7 @@ def _update_graph( def _save_graph(fs_model, alignability_graph): - graph_for_output = nx.relabel_nodes( - alignability_graph, - { - x: "/".join(x) - for x - in alignability_graph - } - ) + graph_for_output = nx.relabel_nodes(alignability_graph, {x: "/".join(x) for x in alignability_graph}) nx.write_gml( graph_for_output, str(fs_model.alignability_graph), @@ -710,11 +685,8 @@ def _save_graph(fs_model, alignability_graph): # cliques = list(nx.connected_components(alignability_graph)) # return cliques -def _get_connected_components( - alignability_graph, - clusters, - max_path_length=2 -): + +def _get_connected_components(alignability_graph, clusters, max_path_length=2): """ Construct neighbourhoods around the most connected neighbourhoods by some max path length, @@ -729,10 +701,7 @@ def _get_connected_components( H.add_node(node) for source, target in path_lengths: if path_lengths[(source, target)] <= max_path_length: - H.add_edge( - source, - target - ) + H.add_edge(source, target) # degrees = dict(nx.degree(H)) @@ -754,7 +723,7 @@ def _get_connected_components( if x in used: continue clusters[x] = [] - print(f'f{x} : {degrees[x]}') + print(f"f{x} : {degrees[x]}") # for n in G.neighbors(x): # used.append(n) @@ -771,11 +740,11 @@ def _get_connected_components( def _update_conformer_sites( - conformer_sites: dict[str, dt.ConformerSite], - connected_component_id: tuple[str, str, str, str], - connected_component: list[tuple[str, str, str, str]], - neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], - structures + conformer_sites: dict[str, dt.ConformerSite], + connected_component_id: tuple[str, str, str, str], + connected_component: list[tuple[str, str, str, str]], + neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + structures, ): matched = False # Check each old conformer site for overlap in membership, and if so update its members @@ -799,26 +768,22 @@ def _update_conformer_sites( [x for x in set(residues)], connected_component, # [x for x in connected_component][0] - connected_component_id + connected_component_id, ) conformer_site_id = "+".join(conformer_site.reference_ligand_id) conformer_sites[conformer_site_id] = conformer_site def _save_connected_components(fs_model, connected_components): - with open(fs_model.connected_components, 'w') as f: + with open(fs_model.connected_components, "w") as f: dic = {} for connected_component_reference, connected_component in connected_components.items(): - dic["+".join(connected_component_reference)] = [ - "+".join(member) - for member - in connected_component - ] + dic["+".join(connected_component_reference)] = ["+".join(member) for member in connected_component] yaml.safe_dump(dic, f, sort_keys=False) def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.ConformerSite]): - with open(fs_model.conformer_sites, 'w') as f: + with open(fs_model.conformer_sites, "w") as f: dic = {} for conformer_site_id, conformer_site in conformer_sites.items(): dic[conformer_site_id] = conformer_site.to_dict() @@ -826,41 +791,29 @@ def _save_conformer_sites(fs_model: dt.FSModel, conformer_sites: dict[str, dt.Co def _get_centroid_res( - residues: list[tuple[str, str]], - reference_neighbourhood: dt.Neighbourhood, + residues: list[tuple[str, str]], + reference_neighbourhood: dt.Neighbourhood, ): res_cas = {} for _residue_id in residues: - for _atom_id, _atom in reference_neighbourhood.atoms.items(): - if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == 'CA'): - res_cas[_atom_id] = _atom + for _atom_id, _atom in reference_neighbourhood.atoms.items(): + if (_atom_id[0] == _residue_id[0]) & (_atom_id[1] == _residue_id[1]) & (_atom_id[2] == "CA"): + res_cas[_atom_id] = _atom id_arr = [_atom_id for _atom_id in res_cas] - arr = np.array( - [ - [_atom.x, _atom.y, _atom.z] - for _atom - in res_cas.values() - ] - ) + arr = np.array([[_atom.x, _atom.y, _atom.z] for _atom in res_cas.values()]) centroid = np.mean(arr, axis=0) - closest = np.argmin( - np.linalg.norm( - arr-centroid, - axis=1 - ) - ) + closest = np.argmin(np.linalg.norm(arr - centroid, axis=1)) closest_atom_id = id_arr[closest] return (closest_atom_id[0], closest_atom_id[1]) - def _update_canonical_sites( - canonical_sites: dict[str, dt.CanonicalSite], - conformer_site: dt.ConformerSite, - conformer_site_id, - neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], - min_shared_residues=6 + canonical_sites: dict[str, dt.CanonicalSite], + conformer_site: dt.ConformerSite, + conformer_site_id, + neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + min_shared_residues=6, ): if len(canonical_sites) != 0: global_reference_dtag = [x for x in canonical_sites.values()][0].global_reference_dtag @@ -890,12 +843,11 @@ def _update_canonical_sites( # If not matched to any existing canonical site create a new one if not matched: - centroid_res = _get_centroid_res( - conformer_site.residues, - neighbourhoods[conformer_site.reference_ligand_id] - ) + centroid_res = _get_centroid_res(conformer_site.residues, neighbourhoods[conformer_site.reference_ligand_id]) canonical_site = dt.CanonicalSite( - [conformer_site_id, ], + [ + conformer_site_id, + ], conformer_site.residues, conformer_site_id, global_reference_dtag, @@ -903,17 +855,16 @@ def _update_canonical_sites( conformer_site.reference_ligand_id[0], centroid_res[0], centroid_res[1], - conformer_site.reference_ligand_id[1] - ) + conformer_site.reference_ligand_id[1], + ), ) - canonical_site_id = conformer_site_id canonical_sites[canonical_site_id] = canonical_site def _save_canonical_sites(fs_model, canonical_sites: dict[str, dt.CanonicalSite]): - with open(fs_model.canonical_sites, 'w') as f: + with open(fs_model.canonical_sites, "w") as f: dic = {} for canonical_site_id, canonical_site in canonical_sites.items(): dic[canonical_site_id] = canonical_site.to_dict() @@ -921,11 +872,11 @@ def _save_canonical_sites(fs_model, canonical_sites: dict[str, dt.CanonicalSite] def _update_xtalform_sites( - xtalform_sites: dict[str, dt.XtalFormSite], - canonical_site: dt.CanonicalSite, - canonical_site_id: str, - dataset_assignments: dict[str, str], - conformer_sites: dict[str, dt.ConformerSite] + xtalform_sites: dict[str, dt.XtalFormSite], + canonical_site: dt.CanonicalSite, + canonical_site_id: str, + dataset_assignments: dict[str, str], + conformer_sites: dict[str, dt.ConformerSite], ): matched = False # for xtalform_site_id, xtalform_site in xtalform_sites.items(): @@ -939,8 +890,7 @@ def _update_xtalform_sites( xtalforms_dict = { (xtalform_site.canonical_site_id, xtalform_site.xtalform_id): xtalform_site_id - for xtalform_site_id, xtalform_site - in xtalform_sites.items() + for xtalform_site_id, xtalform_site in xtalform_sites.items() } for conformer_site_id in canonical_site.conformer_site_ids: @@ -957,7 +907,9 @@ def _update_xtalform_sites( assignment, member[1], canonical_site_id, - [member, ], + [ + member, + ], ) xtalform_sites[xtalform_site_id] = xtalform_site xtalforms_dict[(xtalform_site.canonical_site_id, xtalform_site.xtalform_id)] = xtalform_site_id @@ -967,7 +919,7 @@ def _update_xtalform_sites( def _save_xtalform_sites(fs_model, xtalform_sites: dict[str, dt.XtalFormSite]): - with open(fs_model.xtalform_sites, 'w') as f: + with open(fs_model.xtalform_sites, "w") as f: dic = {} for xtalform_site_id, xtalform_site in xtalform_sites.items(): dic[xtalform_site_id] = xtalform_site.to_dict() @@ -983,9 +935,11 @@ def _save_xtalform_sites(fs_model, xtalform_sites: dict[str, dt.XtalFormSite]): # # ... -def _save_conformer_site_transforms(fs_model: dt.FSModel, - conformer_site_transforms: dict[tuple[str, str], dt.Transform]): - with open(fs_model.conformer_site_transforms, 'w') as f: + +def _save_conformer_site_transforms( + fs_model: dt.FSModel, conformer_site_transforms: dict[tuple[str, str], dt.Transform] +): + with open(fs_model.conformer_site_transforms, "w") as f: dic = {} for conformer_site_transform_id, conformer_site_transform in conformer_site_transforms.items(): dic["~".join(conformer_site_transform_id)] = conformer_site_transform.to_dict() @@ -1001,8 +955,9 @@ def _save_conformer_site_transforms(fs_model: dt.FSModel, # ... + def _save_canonical_site_transforms(fs_model: dt.FSModel, canonical_site_transforms: dict[str, dt.Transform]): - with open(fs_model.canonical_site_transforms, 'w') as f: + with open(fs_model.canonical_site_transforms, "w") as f: dic = {} for canonical_site_transform_id, canonical_site_transform in canonical_site_transforms.items(): dic[canonical_site_transform_id] = canonical_site_transform.to_dict() @@ -1010,10 +965,10 @@ def _save_canonical_site_transforms(fs_model: dt.FSModel, canonical_site_transfo def _update_fs_model( - fs_model: dt.FSModel, - canonical_sites: dict[str, dt.CanonicalSite], - conformer_sites: dict[str, dt.ConformerSite], - reference_datasets: dict[str, dt.Dataset] + fs_model: dt.FSModel, + canonical_sites: dict[str, dt.CanonicalSite], + conformer_sites: dict[str, dt.ConformerSite], + reference_datasets: dict[str, dt.Dataset], ): # Iterate over canonical sites and their members, checking if they already have an output record and # if not creating one @@ -1039,35 +994,48 @@ def _update_fs_model( if canonical_site_id not in ligand_neighbourhood_output.aligned_structures: ligand_neighbourhood_output.aligned_structures[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id - ) + fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_STRUCTURE_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id + ) ) ligand_neighbourhood_output.aligned_artefacts[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, version=version,site=canonical_site_id - ) + fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id + ) ) ligand_neighbourhood_output.aligned_xmaps[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_XMAP_TEMPLATE.format( - dtag=dtag, chain=chain, - residue=residue,version=version, - site=canonical_site_id) + fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_XMAP_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id + ) ) ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_DIFF_TEMPLATE.format( - dtag=dtag, chain=chain, - residue=residue, version=version, - site=canonical_site_id) + fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_DIFF_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id + ) ) ligand_neighbourhood_output.aligned_event_maps[canonical_site_id] = ( - fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id - ) + fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, version=version, site=canonical_site_id + ) ) reference_alignments = fs_model.reference_alignments @@ -1082,16 +1050,20 @@ def _update_fs_model( if canonical_site_id not in reference_alignments[dtag]: reference_alignments[dtag][canonical_site_id] = { - 'aligned_structures': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_STRUCTURE_TEMPLATE.format( + "aligned_structures": fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_REFERENCE_STRUCTURE_TEMPLATE.format(dtag=dtag, site=canonical_site_id), + "aligned_artefacts": fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_REFERENCE_STRUCTURE_ARTEFACTS_TEMPLATE.format( dtag=dtag, site=canonical_site_id ), - 'aligned_artefacts': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, site=canonical_site_id - ), - - 'aligned_xmaps': fs_model.source_dir / constants.ALIGNED_FILES_DIR / dtag / constants.ALIGNED_REFERENCE_XMAP_TEMPLATE.format( - dtag=dtag, - site=canonical_site_id), + "aligned_xmaps": fs_model.source_dir + / constants.ALIGNED_FILES_DIR + / dtag + / constants.ALIGNED_REFERENCE_XMAP_TEMPLATE.format(dtag=dtag, site=canonical_site_id), # 'aligned_event_maps': fs_model.source_dir / constants.ALIGNED_EVENT_MAP_TEMPLATE.format( # dtag=dtag, chain=chain, residue=residue, site=canonical_site_id # ), @@ -1099,44 +1071,43 @@ def _update_fs_model( def _save_fs_model(fs_model: dt.FSModel): - with open(fs_model.fs_model, 'w') as f: + with open(fs_model.fs_model, "w") as f: dic = fs_model.to_dict() yaml.safe_dump(dic, f) def save_reference_structure_transforms( - fs_model: dt.FSModel, - reference_structure_transforms: dict[tuple[str, str], dt.Transform], + fs_model: dt.FSModel, + reference_structure_transforms: dict[tuple[str, str], dt.Transform], ): dic = {} for reference_structure_transform_id, reference_structure_transform in reference_structure_transforms.items(): dic["~".join(reference_structure_transform_id)] = reference_structure_transform.to_dict() - with open(fs_model.reference_structure_transforms, 'w') as f: + with open(fs_model.reference_structure_transforms, "w") as f: yaml.safe_dump(dic, f) def _update( - fs_model: dt.FSModel, - datasets: dict[str, dt.Dataset], - reference_datasets: dict[str, dt.Dataset], - new_datasets: dict[str, dt.Dataset], - assemblies: dict[str, dt.Assembly], - xtalforms: dict[str, dt.XtalForm], - dataset_assignments: dict[str, str], - ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], - # alignment_landmarks: dict[tuple[str,str,str,int], dict[tuple[str, str, str], dt.Atom]], - alignability_graph, - connected_components, - ligand_neighbourhood_transforms: dict[ - tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], - conformer_sites: dict[str, dt.ConformerSite], - conformer_site_transforms: dict[tuple[str, str], dt.Transform], - canonical_sites: dict[str, dt.CanonicalSite], - # canonical_site_transforms: dict[str, dt.Transform], - xtalform_sites: dict[str, dt.XtalFormSite], - reference_structure_transforms: dict[tuple[str, str], dt.Transform], - version + fs_model: dt.FSModel, + datasets: dict[str, dt.Dataset], + reference_datasets: dict[str, dt.Dataset], + new_datasets: dict[str, dt.Dataset], + assemblies: dict[str, dt.Assembly], + xtalforms: dict[str, dt.XtalForm], + dataset_assignments: dict[str, str], + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + # alignment_landmarks: dict[tuple[str,str,str,int], dict[tuple[str, str, str], dt.Atom]], + alignability_graph, + connected_components, + ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], + conformer_sites: dict[str, dt.ConformerSite], + conformer_site_transforms: dict[tuple[str, str], dt.Transform], + canonical_sites: dict[str, dt.CanonicalSite], + # canonical_site_transforms: dict[str, dt.Transform], + xtalform_sites: dict[str, dt.XtalFormSite], + reference_structure_transforms: dict[tuple[str, str], dt.Transform], + version, ): logger.info(f"Version is: {version}") # Get the structures @@ -1319,7 +1290,10 @@ def _update( for chain, chain_alignment_info in dataset_alignment_info.items(): for residue, residue_alignment_info in chain_alignment_info.items(): for version, ligand_neighbourhood_output in residue_alignment_info.items(): - for canonical_site_id, aligned_structure_path in ligand_neighbourhood_output.aligned_structures.items(): + for ( + canonical_site_id, + aligned_structure_path, + ) in ligand_neighbourhood_output.aligned_structures.items(): if not Path(aligned_structure_path).exists(): # _update_aligned_structures() _structure = structures[dtag].clone() @@ -1361,7 +1335,7 @@ def _update( # ) for dtag, dataset_alignment_info in fs_model.reference_alignments.items(): for canonical_site_id, alignment_info in dataset_alignment_info.items(): - aligned_structure_path = alignment_info['aligned_structures'] + aligned_structure_path = alignment_info["aligned_structures"] logger.info(f"Outputting reference structure: {aligned_structure_path}") if not Path(aligned_structure_path).exists(): _structure = structures[dtag].clone() @@ -1371,7 +1345,7 @@ def _update( reference_structure_transforms, # canonical_site_transforms, canonical_site_id, - alignment_info['aligned_structures'], + alignment_info["aligned_structures"], ) else: logger.info(f"Already output reference structure!") @@ -1382,19 +1356,16 @@ def _update( # conformer_site = conformer_sites[conformer_site_id] # for lid in conformer_site.members: # _update_aligned_xmaps() - reference_xmap = read_xmap_from_mtz( - datasets[ - [ - x for x in canonical_sites.values() - ][0].global_reference_dtag - ].mtz - ) + reference_xmap = read_xmap_from_mtz(datasets[[x for x in canonical_sites.values()][0].global_reference_dtag].mtz) logger.info(f"Outputting xmaps...") for dtag, dataset_alignment_info in fs_model.alignments.items(): for chain, chain_alignment_info in dataset_alignment_info.items(): for residue, residue_alignment_info in chain_alignment_info.items(): for version, ligand_neighbourhood_output in residue_alignment_info.items(): - for canonical_site_id, aligned_event_map_path in ligand_neighbourhood_output.aligned_event_maps.items(): + for ( + canonical_site_id, + aligned_event_map_path, + ) in ligand_neighbourhood_output.aligned_event_maps.items(): logger.info(f"Writing to: {aligned_event_map_path}") if not Path(aligned_event_map_path).exists(): _structure = structures[dtag].clone() @@ -1439,7 +1410,7 @@ def _update( # canonical_site_transforms, canonical_site_id, aligned_event_map_path, - aligned_res + aligned_res, ) mtz_path = datasets[dtag].mtz # print(f"Mtz path: {mtz_path}") @@ -1459,7 +1430,7 @@ def _update( # canonical_site_transforms, canonical_site_id, ligand_neighbourhood_output.aligned_xmaps[canonical_site_id], - aligned_res + aligned_res, ) xmap = read_xmap_from_mtz(mtz_path, "Fo-Fc") __align_xmap( @@ -1475,7 +1446,7 @@ def _update( # canonical_site_transforms, canonical_site_id, ligand_neighbourhood_output.aligned_diff_maps[canonical_site_id], - aligned_res + aligned_res, ) else: @@ -1488,16 +1459,16 @@ def _load_assemblies(assemblies_file, new_assemblies_yaml): if assemblies_file.exists(): - with open(assemblies_file, 'r') as f: + with open(assemblies_file, "r") as f: dic = yaml.safe_load(f) - for assembly_id, assembly_info in dic['assemblies'].items(): + for assembly_id, assembly_info in dic["assemblies"].items(): assemblies[assembly_id] = dt.Assembly.from_dict(assembly_info) # Load new info and update if new_assemblies_yaml.exists(): - with open(new_assemblies_yaml, 'r') as f: - new_assemblies_dict = yaml.safe_load(f)['assemblies'] + with open(new_assemblies_yaml, "r") as f: + new_assemblies_dict = yaml.safe_load(f)["assemblies"] else: new_assemblies_dict = {} @@ -1514,16 +1485,16 @@ def _load_xtalforms(xtalforms_file, new_xtalforms_yaml): if xtalforms_file.exists(): - with open(xtalforms_file, 'r') as f: - dic = yaml.safe_load(f)['crystalforms'] + with open(xtalforms_file, "r") as f: + dic = yaml.safe_load(f)["crystalforms"] for xtalform_id, xtalform_info in dic.items(): xtalforms[xtalform_id] = dt.XtalForm.from_dict(xtalform_info) # Load new info and update if new_xtalforms_yaml.exists(): - with open(new_xtalforms_yaml, 'r') as f: - new_xtalforms_dict = yaml.safe_load(f)['crystalforms'] + with open(new_xtalforms_yaml, "r") as f: + new_xtalforms_dict = yaml.safe_load(f)["crystalforms"] else: new_xtalforms_dict = {} @@ -1541,7 +1512,7 @@ def _load_xtalforms_and_assemblies(xtalforms_file, new_xtalforms_yaml): if xtalforms_file.exists(): - with open(xtalforms_file, 'r') as f: + with open(xtalforms_file, "r") as f: dic = yaml.safe_load(f) for xtalform_id, xtalform_info in dic.items(): @@ -1549,7 +1520,7 @@ def _load_xtalforms_and_assemblies(xtalforms_file, new_xtalforms_yaml): # Load new info and update if new_xtalforms_yaml.exists(): - with open(new_xtalforms_yaml, 'r') as f: + with open(new_xtalforms_yaml, "r") as f: new_xtalforms_dict = yaml.safe_load(f) else: new_xtalforms_dict = {} @@ -1566,7 +1537,7 @@ def _load_dataset_assignments(dataset_assignments_yaml): dataset_assignments = {} if dataset_assignments_yaml.exists(): - with open(dataset_assignments_yaml, 'r') as f: + with open(dataset_assignments_yaml, "r") as f: dic = yaml.safe_load(f) for dtag, assignment in dic.items(): @@ -1579,7 +1550,7 @@ def _load_ligand_neighbourhoods(ligand_neighbourhoods_yaml): if ligand_neighbourhoods_yaml.exists(): - with open(ligand_neighbourhoods_yaml, 'r') as f: + with open(ligand_neighbourhoods_yaml, "r") as f: dic = yaml.safe_load(f) if dic: @@ -1598,15 +1569,7 @@ def _load_alignability_graph(alignability_graph): # destringizer=lambda x: tuple(x.split("/")), ) - g = nx.relabel_nodes( - g_initial, - { - x: tuple(x.split("/")) - for x - in g_initial - } - - ) + g = nx.relabel_nodes(g_initial, {x: tuple(x.split("/")) for x in g_initial}) return g @@ -1619,7 +1582,7 @@ def _load_connected_components(connected_components_yaml): if connected_components_yaml.exists(): - with open(connected_components_yaml, 'r') as f: + with open(connected_components_yaml, "r") as f: dic = yaml.safe_load(f) if dic: @@ -1627,9 +1590,7 @@ def _load_connected_components(connected_components_yaml): dtag, chain, residue, version = ligand_id.split("+") # neighbourhood = dt.Neighbourhood.from_dict(neighbourhood_info) connected_components[(dtag, chain, residue, version)] = [ - tuple([x for x in _ligand_id.split("+")]) - for _ligand_id - in neighbourhood_info + tuple([x for x in _ligand_id.split("+")]) for _ligand_id in neighbourhood_info ] return connected_components @@ -1639,7 +1600,7 @@ def _load_ligand_neighbourhood_transforms(ligand_neighbourhood_transforms_yaml): ligand_neighbourhood_transforms = {} if ligand_neighbourhood_transforms_yaml.exists(): - with open(ligand_neighbourhood_transforms_yaml, 'r') as f: + with open(ligand_neighbourhood_transforms_yaml, "r") as f: dic = yaml.safe_load(f) for ligand_transform_key, ligand_transform in dic.items(): @@ -1647,10 +1608,9 @@ def _load_ligand_neighbourhood_transforms(ligand_neighbourhood_transforms_yaml): ligand_1_id, ligand_2_id = ligand_transform_key.split("~") dtag_1, chain_1, residue_1, version = ligand_1_id.split("/") dtag_2, chain_2, residue_2, version = ligand_2_id.split("/") - ligand_neighbourhood_transforms[( - (dtag_1, chain_1, residue_1, version), - (dtag_2, chain_2, residue_2, version) - )] = dt.Transform.from_dict(ligand_transform) + ligand_neighbourhood_transforms[ + ((dtag_1, chain_1, residue_1, version), (dtag_2, chain_2, residue_2, version)) + ] = dt.Transform.from_dict(ligand_transform) return ligand_neighbourhood_transforms @@ -1658,7 +1618,7 @@ def _load_ligand_neighbourhood_transforms(ligand_neighbourhood_transforms_yaml): def _load_conformer_sites(conformer_sites_yaml): conformer_sites = {} if conformer_sites_yaml.exists(): - with open(conformer_sites_yaml, 'r') as f: + with open(conformer_sites_yaml, "r") as f: dic = yaml.safe_load(f) for conformer_site_id, conformer_site_info in dic.items(): conformer_sites[conformer_site_id] = dt.ConformerSite.from_dict(conformer_site_info) @@ -1669,14 +1629,15 @@ def _load_conformer_sites(conformer_sites_yaml): def _load_conformer_site_transforms(conformer_site_transforms_yaml): conformer_site_transforms = {} if conformer_site_transforms_yaml.exists(): - with open(conformer_site_transforms_yaml, 'r') as f: + with open(conformer_site_transforms_yaml, "r") as f: dic = yaml.safe_load(f) for conformer_site_transform_id, conformer_site_transform_info in dic.items(): conformer_site_1, conformer_site_2 = conformer_site_transform_id.split("~") conformer_site_transforms[(conformer_site_1, conformer_site_2)] = dt.Transform.from_dict( - conformer_site_transform_info) + conformer_site_transform_info + ) return conformer_site_transforms @@ -1684,7 +1645,7 @@ def _load_conformer_site_transforms(conformer_site_transforms_yaml): def _load_canonical_sites(canonical_sites_yaml): canonical_sites = {} if canonical_sites_yaml.exists(): - with open(canonical_sites_yaml, 'r') as f: + with open(canonical_sites_yaml, "r") as f: dic = yaml.safe_load(f) if dic is not None: @@ -1697,7 +1658,7 @@ def _load_canonical_sites(canonical_sites_yaml): def _load_canonical_site_transforms(canonical_site_transforms_yaml): canonical_site_transforms = {} if canonical_site_transforms_yaml.exists(): - with open(canonical_site_transforms_yaml, 'r') as f: + with open(canonical_site_transforms_yaml, "r") as f: dic = yaml.safe_load(f) for canonical_site_id, canonical_site_transform_info in dic.items(): @@ -1709,7 +1670,7 @@ def _load_canonical_site_transforms(canonical_site_transforms_yaml): def _load_xtalform_sites(xtalform_sites_yaml): xtalform_sites = {} if xtalform_sites_yaml.exists(): - with open(xtalform_sites_yaml, 'r') as f: + with open(xtalform_sites_yaml, "r") as f: dic = yaml.safe_load(f) for xtalform_site_id, xtalform_site_info in dic.items(): @@ -1721,13 +1682,14 @@ def _load_xtalform_sites(xtalform_sites_yaml): def _load_reference_stucture_transforms(reference_structure_transforms_yaml): reference_structure_transforms = {} if reference_structure_transforms_yaml.exists(): - with open(reference_structure_transforms_yaml, 'r') as f: + with open(reference_structure_transforms_yaml, "r") as f: dic = yaml.safe_load(f) for reference_structure_transform_id, reference_structure_transform_info in dic.items(): dtag, canonical_site_id = reference_structure_transform_id.split("~") reference_structure_transforms[(dtag, canonical_site_id)] = dt.Transform.from_dict( - reference_structure_transform_info) + reference_structure_transform_info + ) return reference_structure_transforms @@ -1762,7 +1724,9 @@ def update(self, options_json: str): # else: # source_fs_model = dt.FSModel.default() - fs_model = dt.FSModel.from_dir(options.output_dir, ) + fs_model = dt.FSModel.from_dir( + options.output_dir, + ) if source_fs_model: fs_model.alignments = source_fs_model.alignments fs_model.reference_alignments = source_fs_model.reference_alignments @@ -1782,10 +1746,7 @@ def update(self, options_json: str): fs_model.symlink_old_data() source_data_model = dt.SourceDataModel.from_fs_model( - fs_model, - options.datasources, - options.datasource_types, - options.panddas + fs_model, options.datasources, options.datasource_types, options.panddas ) datasets, reference_datasets, new_datasets = source_data_model.get_datasets() @@ -1796,8 +1757,9 @@ def update(self, options_json: str): # Get assemblies logger.info(f"Getting assemblies...") if source_fs_model: - assemblies: dict[str, dt.Assembly] = _load_assemblies(source_fs_model.assemblies, - Path(options.assemblies_json)) + assemblies: dict[str, dt.Assembly] = _load_assemblies( + source_fs_model.assemblies, Path(options.assemblies_json) + ) else: assemblies = _load_assemblies(fs_model.assemblies, Path(options.assemblies_json)) # for key, assembly in assemblies.items(): @@ -1809,7 +1771,9 @@ def update(self, options_json: str): # Get xtalforms logger.info(f"Getting xtalforms...") if source_fs_model: - xtalforms: dict[str, dt.XtalForm] = _load_xtalforms(source_fs_model.xtalforms, Path(options.xtalforms_json)) + xtalforms: dict[str, dt.XtalForm] = _load_xtalforms( + source_fs_model.xtalforms, Path(options.xtalforms_json) + ) else: xtalforms = _load_xtalforms(fs_model.xtalforms, Path(options.xtalforms_json)) # for key, xtalform in xtalforms.items(): @@ -1831,10 +1795,10 @@ def update(self, options_json: str): logger.info(f"Getting ligand neighbourhoods...") if source_fs_model: ligand_neighbourhoods: dict[tuple[str, str, str], dt.Neighbourhood] = _load_ligand_neighbourhoods( - source_fs_model.ligand_neighbourhoods) + source_fs_model.ligand_neighbourhoods + ) else: - ligand_neighbourhoods = _load_ligand_neighbourhoods( - fs_model.ligand_neighbourhoods) + ligand_neighbourhoods = _load_ligand_neighbourhoods(fs_model.ligand_neighbourhoods) print(ligand_neighbourhoods) # Get alignability graph @@ -1853,12 +1817,12 @@ def update(self, options_json: str): logger.info(f"Getting lighand neighbourhood transforms...") if source_fs_model: ligand_neighbourhood_transforms: dict[ - tuple[ - tuple[str, str, str], tuple[str, str, str]], dt.Transform] = _load_ligand_neighbourhood_transforms( - source_fs_model.ligand_neighbourhood_transforms) + tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform + ] = _load_ligand_neighbourhood_transforms(source_fs_model.ligand_neighbourhood_transforms) else: ligand_neighbourhood_transforms = _load_ligand_neighbourhood_transforms( - fs_model.ligand_neighbourhood_transforms) + fs_model.ligand_neighbourhood_transforms + ) # Get conformer sites logger.info(f"Getting conformer sites...") @@ -1871,10 +1835,10 @@ def update(self, options_json: str): logger.info(f"Getting conformer site transforms...") if source_fs_model: conformer_site_transforms: dict[tuple[str, str], dt.Transform] = _load_conformer_site_transforms( - source_fs_model.conformer_site_transforms) + source_fs_model.conformer_site_transforms + ) else: - conformer_site_transforms = _load_conformer_site_transforms( - fs_model.conformer_site_transforms) + conformer_site_transforms = _load_conformer_site_transforms(fs_model.conformer_site_transforms) # Get canonical sites logger.info(f"Getting canonical sites...") @@ -1903,10 +1867,12 @@ def update(self, options_json: str): logger.info(f"Getting reference structure transforms...") if source_fs_model: reference_structure_transforms: dict[tuple[str, str], dt.Transform] = _load_reference_stucture_transforms( - source_fs_model.reference_structure_transforms) + source_fs_model.reference_structure_transforms + ) else: reference_structure_transforms = _load_reference_stucture_transforms( - fs_model.reference_structure_transforms) + fs_model.reference_structure_transforms + ) # Run the update _update( @@ -1926,7 +1892,7 @@ def update(self, options_json: str): canonical_sites, # canonical_site_transforms, xtalform_sites, - reference_structure_transforms + reference_structure_transforms, ) def process_all(self, option_json: str): @@ -2068,33 +2034,33 @@ def process_all(self, option_json: str): continue chain_output.aligned_ligands[residue].aligned_structures[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_STRUCTURE_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_STRUCTURE_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) chain_output.aligned_ligands[residue].aligned_artefacts[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) chain_output.aligned_ligands[residue].aligned_xmaps[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_XMAP_TEMPLATE.format(dtag=dtag, chain=chain, residue=residue, site=site_id) + output.aligned_dir + + "/" + + constants.ALIGNED_XMAP_TEMPLATE.format(dtag=dtag, chain=chain, residue=residue, site=site_id) ) chain_output.aligned_ligands[residue].aligned_event_maps[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_EVENT_MAP_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_EVENT_MAP_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) # Save the output file @@ -2212,33 +2178,33 @@ def process(self, option_json: str): continue chain_output.aligned_ligands[residue].aligned_structures[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_STRUCTURE_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_STRUCTURE_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) chain_output.aligned_ligands[residue].aligned_artefacts[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_STRUCTURE_ARTEFACTS_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) chain_output.aligned_ligands[residue].aligned_xmaps[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_XMAP_TEMPLATE.format(dtag=dtag, chain=chain, residue=residue, site=site_id) + output.aligned_dir + + "/" + + constants.ALIGNED_XMAP_TEMPLATE.format(dtag=dtag, chain=chain, residue=residue, site=site_id) ) chain_output.aligned_ligands[residue].aligned_event_maps[site_id] = ( - output.aligned_dir - + "/" - + constants.ALIGNED_EVENT_MAP_TEMPLATE.format( - dtag=dtag, chain=chain, residue=residue, site=site_id - ) + output.aligned_dir + + "/" + + constants.ALIGNED_EVENT_MAP_TEMPLATE.format( + dtag=dtag, chain=chain, residue=residue, site=site_id + ) ) # Save the output file @@ -2308,10 +2274,10 @@ def init(self, source_dir: str): return system_data def add_data_source( - self, - source_dir: str, - data_source_dir: str, - source_type: str = "model_building", + self, + source_dir: str, + data_source_dir: str, + source_type: str = "model_building", ): _source_dir = Path(source_dir) _data_source_dir = Path(data_source_dir) @@ -2337,9 +2303,9 @@ def parse_data_sources(self, source_dir: str): _parse_data_sources(_source_dir) def open_site( - self, - option_json: str, - site_id: int, + self, + option_json: str, + site_id: int, ): options = Options.parse_file(option_json) output = Output.read(Path(options.source_dir) / constants.OUTPUT_JSON_PATH) @@ -2395,9 +2361,9 @@ def pretty_print_dataset(self, source_dir: str): print(system_data) def align( - self, - system_data_dir: str, - source_dir: str, + self, + system_data_dir: str, + source_dir: str, ): self.build_system_data(system_data_dir, source_dir) self.build_graph(source_dir) @@ -2416,8 +2382,8 @@ def align( # self.align_xmaps(source_dir) def build_graph( - self, - source_dir: str, + self, + source_dir: str, ): _source_dir: Path = Path(source_dir) @@ -2441,13 +2407,13 @@ def change_site_reference(self, source_dir: str, site_id: int, subsite_id: int): _change_site_reference(_source_dir, site_id, subsite_id) def change_subsite_reference( - self, - source_dir: str, - site_id: int, - subsite_id: int, - dtag: int, - chain: str, - residue: int, + self, + source_dir: str, + site_id: int, + subsite_id: int, + dtag: int, + chain: str, + residue: int, ): _source_dir: Path = Path(source_dir) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 9b6fae7c..29efdff4 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -42,7 +42,7 @@ FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" ASSEMBLIES_YAML_FILE_NAME = "assemblies.yaml" -XTALFORMS_YAML_FILE_NAME = "xtalforms.yaml" +XTALFORMS_YAML_FILEĎ€_NAME = "xtalforms.yaml" ASSIGNED_XTALFORMS_YAML_FILE_NAME = "assigned_xtalforms.yaml" NEIGHBOURHOODS_YAML_FILE_NAME = "neighbourhoods.yaml" CONNECTED_COMPONENTS_YAML_NAME = "connected_components.yaml" @@ -56,4 +56,4 @@ ALIGNED_REFERENCE_STRUCTURE_TEMPLATE = "{dtag}_{site}_ref.pdb" ALIGNED_REFERENCE_STRUCTURE_ARTEFACTS_TEMPLATE = "{dtag}_{site}_artefacts_ref.pdb" -ALIGNED_REFERENCE_XMAP_TEMPLATE = "{dtag}_{site}_ref.ccp4" \ No newline at end of file +ALIGNED_REFERENCE_XMAP_TEMPLATE = "{dtag}_{site}_ref.ccp4" diff --git a/src/ligand_neighbourhood_alignment/data.py b/src/ligand_neighbourhood_alignment/data.py index c9a95c1b..926a6062 100644 --- a/src/ligand_neighbourhood_alignment/data.py +++ b/src/ligand_neighbourhood_alignment/data.py @@ -526,8 +526,6 @@ def get_box(neighbourhood: LigandNeighbourhood, xmap, transform): return box - - def write_xmap(xmap, path: Path, neighbourhood: LigandNeighbourhood, transform): ccp4 = gemmi.Ccp4Map() @@ -548,9 +546,6 @@ def write_xmap(xmap, path: Path, neighbourhood: LigandNeighbourhood, transform): ccp4.write_ccp4_map(str(path)) - - - def read_graph(path: Path): g = nx.read_gml( str(path / constants.ALIGNABILITY_GRAPH_FILE_NAME), diff --git a/src/ligand_neighbourhood_alignment/dt.py b/src/ligand_neighbourhood_alignment/dt.py index 8ddefd2c..581e1c21 100644 --- a/src/ligand_neighbourhood_alignment/dt.py +++ b/src/ligand_neighbourhood_alignment/dt.py @@ -7,7 +7,8 @@ import pandas as pd import yaml from loguru import logger -logger.remove() # for someone not familiar with the lib, whats going on here? + +logger.remove() # for someone not familiar with the lib, whats going on here? logger.add(sys.stdout, level="INFO") from ligand_neighbourhood_alignment import constants @@ -18,13 +19,14 @@ class LigandNeighbourhoodOutput: - def __init__(self, - aligned_structures: dict[str, str], - aligned_artefacts: dict[str, str], - aligned_xmaps: dict[str, str], - aligned_diff_maps: dict[str, str], + def __init__( + self, + aligned_structures: dict[str, str], + aligned_artefacts: dict[str, str], + aligned_xmaps: dict[str, str], + aligned_diff_maps: dict[str, str], aligned_event_maps: dict[str, str], - ): + ): self.aligned_structures = aligned_structures self.aligned_artefacts: dict[str, str] = aligned_artefacts self.aligned_xmaps: dict[str, str] = aligned_xmaps @@ -34,46 +36,56 @@ def __init__(self, @staticmethod def from_dict(dic, source_dir): return LigandNeighbourhoodOutput( - aligned_structures={k: Path(v) for k, v in dic["aligned_structures"].items()}, - aligned_artefacts={k: Path(v) for k, v in dic["aligned_artefacts"].items()}, - aligned_xmaps={k: Path(v) for k, v in dic["aligned_xmaps"].items()}, - aligned_diff_maps={k: Path(v) for k, v in dic["aligned_diff_maps"].items()}, - aligned_event_maps={k: Path(v) for k, v in dic["aligned_event_maps"].items()}, + aligned_structures={k: Path(v) for k, v in dic["aligned_structures"].items()}, + aligned_artefacts={k: Path(v) for k, v in dic["aligned_artefacts"].items()}, + aligned_xmaps={k: Path(v) for k, v in dic["aligned_xmaps"].items()}, + aligned_diff_maps={k: Path(v) for k, v in dic["aligned_diff_maps"].items()}, + aligned_event_maps={k: Path(v) for k, v in dic["aligned_event_maps"].items()}, ) def to_dict(self): dic = { - 'aligned_structures': {canonical_site_id:str(path) for canonical_site_id, path in self.aligned_structures.items()}, - 'aligned_artefacts': {canonical_site_id:str(path) for canonical_site_id, path in self.aligned_artefacts.items()}, - 'aligned_xmaps': {canonical_site_id:str(path) for canonical_site_id, path in self.aligned_xmaps.items()}, - 'aligned_diff_maps': {canonical_site_id:str(path) for canonical_site_id, path in self.aligned_diff_maps.items()}, - 'aligned_event_maps': {canonical_site_id:str(path) for canonical_site_id, path in self.aligned_event_maps.items()}, + "aligned_structures": { + canonical_site_id: str(path) for canonical_site_id, path in self.aligned_structures.items() + }, + "aligned_artefacts": { + canonical_site_id: str(path) for canonical_site_id, path in self.aligned_artefacts.items() + }, + "aligned_xmaps": {canonical_site_id: str(path) for canonical_site_id, path in self.aligned_xmaps.items()}, + "aligned_diff_maps": { + canonical_site_id: str(path) for canonical_site_id, path in self.aligned_diff_maps.items() + }, + "aligned_event_maps": { + canonical_site_id: str(path) for canonical_site_id, path in self.aligned_event_maps.items() + }, } return dic + def symlink(old_path, new_path): os.symlink(old_path.resolve(), new_path) + class FSModel: def __init__( - self, - source_dir, - fs_model, - # assemblies, - xtalforms, - dataset_assignments, - ligand_neighbourhoods, - alignability_graph, - connected_components, - ligand_neighbourhood_transforms, - conformer_sites, - conformer_site_transforms, - canonical_sites, - # canonical_site_transforms, - xtalform_sites, - reference_structure_transforms, - alignments, - reference_alignments + self, + source_dir, + fs_model, + # assemblies, + xtalforms, + dataset_assignments, + ligand_neighbourhoods, + alignability_graph, + connected_components, + ligand_neighbourhood_transforms, + conformer_sites, + conformer_site_transforms, + canonical_sites, + # canonical_site_transforms, + xtalform_sites, + reference_structure_transforms, + alignments, + reference_alignments, ): self.source_dir = source_dir self.fs_model = fs_model @@ -96,7 +108,7 @@ def __init__( def symlink_old_data(self): for dtag, dataset_alignments in self.alignments.items(): if not (self.source_dir / constants.ALIGNED_FILES_DIR / dtag).exists(): - os.mkdir(self.source_dir / constants.ALIGNED_FILES_DIR / dtag ) + os.mkdir(self.source_dir / constants.ALIGNED_FILES_DIR / dtag) for chain, chain_alignments in dataset_alignments.items(): for residue, ligand_neighbourhood_alignments in chain_alignments.items(): for canonical_site_id in ligand_neighbourhood_alignments.aligned_structures: @@ -124,35 +136,34 @@ def symlink_old_data(self): # Symlink old alignments for dtag, dtag_alignment_info in self.reference_alignments.items(): if not (self.source_dir / constants.ALIGNED_FILES_DIR / dtag).exists(): - os.mkdir(self.source_dir / constants.ALIGNED_FILES_DIR / dtag ) + os.mkdir(self.source_dir / constants.ALIGNED_FILES_DIR / dtag) for canonical_site_id, canonical_site_alignment_info in dtag_alignment_info.items(): - old_path= Path(canonical_site_alignment_info['aligned_structures']) + old_path = Path(canonical_site_alignment_info["aligned_structures"]) new_path = self.source_dir / constants.ALIGNED_FILES_DIR / dtag / old_path.name if not new_path.exists(): symlink(old_path, new_path) - old_path = Path(canonical_site_alignment_info['aligned_artefacts']) + old_path = Path(canonical_site_alignment_info["aligned_artefacts"]) new_path = self.source_dir / constants.ALIGNED_FILES_DIR / dtag / old_path.name if not new_path.exists(): symlink(old_path, new_path) - old_path = Path(canonical_site_alignment_info['aligned_xmaps']) + old_path = Path(canonical_site_alignment_info["aligned_xmaps"]) new_path = self.source_dir / constants.ALIGNED_FILES_DIR / dtag / old_path.name if not new_path.exists(): symlink(old_path, new_path) - @staticmethod def from_dir( - source_dir: str, - # output_dir: str, + source_dir: str, + # output_dir: str, ): source_dir = Path(source_dir) # output_dir = Path(output_dir) fs_model = source_dir / constants.FS_MODEL_YAML_FILE_NAME if fs_model.exists(): - with open(fs_model, 'r') as f: + with open(fs_model, "r") as f: dic = yaml.safe_load(f) if dic is not None: return FSModel.from_dict(dic) @@ -190,7 +201,7 @@ def from_dir( xtalform_sites, reference_structure_transforms, alignments, - reference_alignments + reference_alignments, ) @staticmethod @@ -207,7 +218,8 @@ def from_dict(dic): for version, ligand_neighbourhood_alignments in residue_alignments.items(): # _dtag, _chain, _residue = ligand_neighbourhood.split("/") alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.from_dict( - ligand_neighbourhood_alignments, source_dir) + ligand_neighbourhood_alignments, source_dir + ) # reference_alignments = {} # for dtag, dataset_alignments in alignments["reference_alignments"].items(): @@ -223,31 +235,33 @@ def from_dict(dic): reference_alignments[dtag] = {} for canonical_site_id, canonical_site_alignment_info in canonical_site_alignments.items(): reference_alignments[dtag][canonical_site_id] = { - 'aligned_structures': Path(canonical_site_alignment_info['aligned_structures']), - 'aligned_artefacts': Path(canonical_site_alignment_info['aligned_artefacts']), - 'aligned_xmaps': Path(canonical_site_alignment_info['aligned_xmaps']) + "aligned_structures": Path(canonical_site_alignment_info["aligned_structures"]), + "aligned_artefacts": Path(canonical_site_alignment_info["aligned_artefacts"]), + "aligned_xmaps": Path(canonical_site_alignment_info["aligned_xmaps"]), } return FSModel( source_dir=Path(dic["source_dir"]), - fs_model=Path(dic['fs_model']), - xtalforms=Path(dic['crystalforms']), - dataset_assignments=Path(dic['dataset_assignments']), - ligand_neighbourhoods=Path(dic['ligand_neighbourhoods']), - alignability_graph=Path(dic['alignability_graph']), - connected_components=Path(dic['connected_components']), - ligand_neighbourhood_transforms=Path(dic['ligand_neighbourhood_transforms']), - conformer_sites=Path(dic['conformer_sites']), - conformer_site_transforms=Path(dic['conformer_site_transforms']), - canonical_sites=Path(dic['canonical_sites']), + fs_model=Path(dic["fs_model"]), + xtalforms=Path(dic["crystalforms"]), + dataset_assignments=Path(dic["dataset_assignments"]), + ligand_neighbourhoods=Path(dic["ligand_neighbourhoods"]), + alignability_graph=Path(dic["alignability_graph"]), + connected_components=Path(dic["connected_components"]), + ligand_neighbourhood_transforms=Path(dic["ligand_neighbourhood_transforms"]), + conformer_sites=Path(dic["conformer_sites"]), + conformer_site_transforms=Path(dic["conformer_site_transforms"]), + canonical_sites=Path(dic["canonical_sites"]), # canonical_site_transforms=Path(dic['canonical_site_transforms']), - xtalform_sites=Path(dic['xtalform_sites']), - reference_structure_transforms=Path(dic['reference_structure_transforms']), + xtalform_sites=Path(dic["xtalform_sites"]), + reference_structure_transforms=Path(dic["reference_structure_transforms"]), alignments=alignments, reference_alignments=reference_alignments, ) - def to_dict(self, ): + def to_dict( + self, + ): dic = {} alignments = {} for dtag, dataset_alignments in self.alignments.items(): @@ -258,64 +272,64 @@ def to_dict(self, ): alignments[dtag][chain][residue] = {} for version, ligand_neighbourhood_alignments in residue_alignments.items(): alignments[dtag][chain][residue][version] = LigandNeighbourhoodOutput.to_dict( - ligand_neighbourhood_alignments) + ligand_neighbourhood_alignments + ) reference_alignments = {} for dtag, dtag_alignment_info in self.reference_alignments.items(): reference_alignments[dtag] = {} for canonical_site_id, canonical_site_alignment_info in dtag_alignment_info.items(): reference_alignments[dtag][canonical_site_id] = { - 'aligned_structures': str(canonical_site_alignment_info['aligned_structures']), - 'aligned_artefacts': str(canonical_site_alignment_info['aligned_artefacts']), - 'aligned_xmaps': str(canonical_site_alignment_info['aligned_xmaps']) + "aligned_structures": str(canonical_site_alignment_info["aligned_structures"]), + "aligned_artefacts": str(canonical_site_alignment_info["aligned_artefacts"]), + "aligned_xmaps": str(canonical_site_alignment_info["aligned_xmaps"]), } return { - 'source_dir': str(self.source_dir), - 'fs_model': str(self.fs_model), - 'crystalforms': str(self.xtalforms), - 'dataset_assignments': str(self.dataset_assignments), - 'ligand_neighbourhoods': str(self.ligand_neighbourhoods), - 'alignability_graph': str(self.alignability_graph), - 'connected_components': str(self.connected_components), - 'ligand_neighbourhood_transforms': str(self.ligand_neighbourhood_transforms), - 'conformer_sites': str(self.conformer_sites), - 'conformer_site_transforms': str(self.conformer_site_transforms), - 'canonical_sites': str(self.canonical_sites), + "source_dir": str(self.source_dir), + "fs_model": str(self.fs_model), + "crystalforms": str(self.xtalforms), + "dataset_assignments": str(self.dataset_assignments), + "ligand_neighbourhoods": str(self.ligand_neighbourhoods), + "alignability_graph": str(self.alignability_graph), + "connected_components": str(self.connected_components), + "ligand_neighbourhood_transforms": str(self.ligand_neighbourhood_transforms), + "conformer_sites": str(self.conformer_sites), + "conformer_site_transforms": str(self.conformer_site_transforms), + "canonical_sites": str(self.canonical_sites), # 'canonical_site_transforms': str(self.canonical_site_transforms), - 'xtalform_sites': str(self.xtalform_sites), - 'reference_structure_transforms': str(self.reference_structure_transforms), - 'alignments': alignments, - 'reference_alignments': reference_alignments, + "xtalform_sites": str(self.xtalform_sites), + "reference_structure_transforms": str(self.reference_structure_transforms), + "alignments": alignments, + "reference_alignments": reference_alignments, } class Datasource: - def __init__(self, - path: str, - datasource_type: str - ): + def __init__(self, path: str, datasource_type: str): self.path = path self.datasource_type = datasource_type class PanDDA: - def __init__(self, - path: str, - # event_table_path: str - ): + def __init__( + self, + path: str, + # event_table_path: str + ): self.path = Path(path) self.event_table_path = self.path / constants.PANDDA_ANALYSES_DIR / constants.PANDDA_EVENTS_INSPECT_TABLE_PATH class LigandBindingEvent: - def __init__(self, - id, - dtag, - chain, - residue, - xmap, - ): + def __init__( + self, + id, + dtag, + chain, + residue, + xmap, + ): self.id: str = id self.dtag: str = dtag self.chain: str = chain @@ -324,13 +338,14 @@ def __init__(self, class Dataset: - def __init__(self, - dtag, - pdb, - xmap, - mtz, - ligand_binding_events: dict[tuple[str, str, str], LigandBindingEvent], - ): + def __init__( + self, + dtag, + pdb, + xmap, + mtz, + ligand_binding_events: dict[tuple[str, str, str], LigandBindingEvent], + ): self.dtag = dtag self.pdb = pdb self.xmap = xmap @@ -339,22 +354,22 @@ def __init__(self, class SourceDataModel: - - def __init__(self, - fs_model: FSModel, - datasources: list[Datasource], - panddas: list[PanDDA], - ): + def __init__( + self, + fs_model: FSModel, + datasources: list[Datasource], + panddas: list[PanDDA], + ): self.fs_model = fs_model self.datasources = datasources self.panddas = panddas @staticmethod def from_fs_model( - fs_model: FSModel, - datasources, - datasource_types, - panddas, + fs_model: FSModel, + datasources, + datasource_types, + panddas, ): _datasources = [] for datasource_path, datasource_type in zip(datasources, datasource_types): @@ -366,11 +381,7 @@ def from_fs_model( pandda = PanDDA(pandda_dir) _panddas.append(pandda) - return SourceDataModel( - fs_model, - _datasources, - _panddas - ) + return SourceDataModel(fs_model, _datasources, _panddas) ... def get_datasets(self): @@ -475,147 +486,101 @@ def to_dict(self, path: Path): class Generator: - def __init__( - self, - biomol: str, - chain: str, - triplet: str - ): + def __init__(self, biomol: str, chain: str, triplet: str): self.biomol: str = biomol self.chain: str = chain self.triplet: str = triplet class Assembly: - def __init__(self, - reference: str, - generators: list[Generator] - ): + def __init__(self, reference: str, generators: list[Generator]): self.reference = reference self.generators = generators @staticmethod def from_dict(dic): - reference = dic['reference'] - biomol = dic['biomol'] - chains = dic['chains'] + reference = dic["reference"] + biomol = dic["biomol"] + chains = dic["chains"] # Split biomol on commas and strip whitespace - biomol_matches = re.findall( - '([A-Z]+)', - biomol - ) + biomol_matches = re.findall("([A-Z]+)", biomol) # Split chains on commas that do not follow a number, x,y or z and strip whitespace - chain_matches = re.findall( - '(([A-Z]+)([(]+[^()]+[)]+)*)', - chains - ) + chain_matches = re.findall("(([A-Z]+)([(]+[^()]+[)]+)*)", chains) print(biomol_matches) print(chain_matches) # Make generators generators = [] for biomol_match, chain_match in zip(biomol_matches, chain_matches): if len(chain_match[2]) == 0: - xyz = 'x,y,z' + xyz = "x,y,z" else: xyz = chain_match[2][1:-1] - generators.append( - Generator( - biomol_match, - chain_match[1], - xyz - ) - ) - - return Assembly( - reference, - generators - ) + generators.append(Generator(biomol_match, chain_match[1], xyz)) + + return Assembly(reference, generators) class XtalFormAssembly: - def __init__( - self, - assembly: str, - chains: list[str], - transforms: list[str] - ): + def __init__(self, assembly: str, chains: list[str], transforms: list[str]): self.assembly = assembly self.chains = chains self.transforms = transforms class XtalForm: - def __init__(self, - reference: str, - assemblies: dict[str, XtalFormAssembly] - ): + def __init__(self, reference: str, assemblies: dict[str, XtalFormAssembly]): self.reference = reference self.assemblies = assemblies @staticmethod def from_dict(dic): - reference = dic['reference'] - assemblies = dic['assemblies'] + reference = dic["reference"] + assemblies = dic["assemblies"] _assemblies = {} for xtalform_assembly_id, xtalform_assembly_info in assemblies.items(): assemblies[xtalform_assembly_id] = {} - assembly = xtalform_assembly_info['assembly'] - chains = xtalform_assembly_info['chains'] - chains_matches = re.findall( - '(([A-Z]+)([(]+[^()]+[)]+)*)', - chains - ) + assembly = xtalform_assembly_info["assembly"] + chains = xtalform_assembly_info["chains"] + chains_matches = re.findall("(([A-Z]+)([(]+[^()]+[)]+)*)", chains) _chains = [] _transforms = [] for chain_match in chains_matches: _chains.append(chain_match[1]) if len(chain_match[2]) == 0: - xyz = 'x,y,z' + xyz = "x,y,z" else: xyz = chain_match[2][1:-1] _transforms.append(xyz) - _assemblies[xtalform_assembly_id] = XtalFormAssembly( - assembly, - _chains, - _transforms - ) + _assemblies[xtalform_assembly_id] = XtalFormAssembly(assembly, _chains, _transforms) return XtalForm(reference, _assemblies) class Transform: - def __init__(self, - vec, - mat): + def __init__(self, vec, mat): self.vec: list[float] = vec self.mat: list[list[float]] = mat @staticmethod def from_dict(dic): - return Transform( - dic['vec'], - dic['mat'] - ) + return Transform(dic["vec"], dic["mat"]) def to_dict(self): - return { - 'vec': self.vec, - 'mat': self.mat - } + return {"vec": self.vec, "mat": self.mat} class Atom: def __init__( - self, - element: str, - x: float, - y: float, - z: float, - image: Transform, + self, + element: str, + x: float, + y: float, + z: float, + image: Transform, ): self.element: str = element self.x: float = x @@ -625,30 +590,15 @@ def __init__( @staticmethod def from_dict(dic): - return Atom( - dic["element"], - dic["x"], - dic["y"], - dic["z"], - Transform.from_dict(dic["image"]) - ) + return Atom(dic["element"], dic["x"], dic["y"], dic["z"], Transform.from_dict(dic["image"])) def to_dict(self): dic = {} - return { - "element": self.element, - 'x': self.x, - 'y': self.y, - 'z': self.z, - 'image': self.image.to_dict() - } + return {"element": self.element, "x": self.x, "y": self.y, "z": self.z, "image": self.image.to_dict()} class Neighbourhood: - def __init__(self, - atoms: dict[tuple[str, str, str], Atom], - artefact_atoms: dict[tuple[str, str, str], Atom] - ): + def __init__(self, atoms: dict[tuple[str, str, str], Atom], artefact_atoms: dict[tuple[str, str, str], Atom]): self.atoms = atoms self.artefact_atoms = artefact_atoms @@ -657,8 +607,8 @@ def from_dict(dic): atoms = {} artefact_atoms = {} - _atoms = dic['atoms'] - _artefact_atoms = dic['artefact_atoms'] + _atoms = dic["atoms"] + _artefact_atoms = dic["artefact_atoms"] for atom_id, atom_info in _atoms.items(): chain, residue, atom = atom_id.split("/") atoms[(chain, residue, atom)] = Atom.from_dict(atom_info) @@ -668,19 +618,16 @@ def from_dict(dic): chain, residue, atom = atom_id.split("/") artefact_atoms[(chain, residue, atom)] = Atom.from_dict(atom_info) - return Neighbourhood( - atoms, - artefact_atoms - ) + return Neighbourhood(atoms, artefact_atoms) def to_dict(self): dic = {} - dic['atoms'] = {} + dic["atoms"] = {} for atom_id, atom in self.atoms.items(): - dic['atoms']["/".join(atom_id)] = atom.to_dict() - dic['artefact_atoms'] = {} + dic["atoms"]["/".join(atom_id)] = atom.to_dict() + dic["artefact_atoms"] = {} for atom_id, atom in self.artefact_atoms.items(): - dic['artefact_atoms']["/".join(atom_id)] = atom.to_dict() + dic["artefact_atoms"]["/".join(atom_id)] = atom.to_dict() return dic @@ -691,10 +638,10 @@ class AlignabilityGraph: class ConformerSite: def __init__( - self, - residues: list[tuple[str, str]], - members: list[tuple[str, str, str, str]], - reference_ligand_id: tuple[str, str, str, str] + self, + residues: list[tuple[str, str]], + members: list[tuple[str, str, str, str]], + reference_ligand_id: tuple[str, str, str, str], ): self.residues: list[tuple[str, str]] = residues self.members: list[tuple[str, str, str, str]] = members @@ -703,36 +650,34 @@ def __init__( @staticmethod def from_dict(dic): residues = [] - for res in dic['residues']: + for res in dic["residues"]: chain, residue, name = res.split("/") residues.append((chain, residue, name)) members = [] - for member in dic['members']: + for member in dic["members"]: dtag, chain, residue, version = member.split("/") members.append((dtag, chain, residue, version)) ref_dtag, ref_chain, ref_residue, version = dic["reference_ligand_id"].split("/") - return ConformerSite( - residues, - members, - (ref_dtag, ref_chain, ref_residue, version) - ) + return ConformerSite(residues, members, (ref_dtag, ref_chain, ref_residue, version)) - def to_dict(self, ): + def to_dict( + self, + ): return { - 'residues': ["/".join(resid) for resid in self.residues], - 'members': ["/".join(lid) for lid in self.members], - 'reference_ligand_id': "/".join(self.reference_ligand_id) + "residues": ["/".join(resid) for resid in self.residues], + "members": ["/".join(lid) for lid in self.members], + "reference_ligand_id": "/".join(self.reference_ligand_id), } class CanonicalSite: def __init__( - self, - conformer_site_ids: list[str], - residues: list[tuple[str, str]], - reference_conformer_site_id: str, - global_reference_dtag: str, - centroid_res: tuple[str,str,str, str] + self, + conformer_site_ids: list[str], + residues: list[tuple[str, str]], + reference_conformer_site_id: str, + global_reference_dtag: str, + centroid_res: tuple[str, str, str, str], ): self.conformer_site_ids: list[str] = conformer_site_ids self.residues: list[tuple[str, str]] = residues @@ -743,36 +688,36 @@ def __init__( @staticmethod def from_dict(dic): residues = [] - for res in dic['residues']: + for res in dic["residues"]: chain, residue, name = res.split("/") residues.append((chain, residue, name)) return CanonicalSite( - dic['conformer_site_ids'], + dic["conformer_site_ids"], residues, - dic['reference_conformer_site_id'], - dic['global_reference_dtag'], - dic['centroid_res'].split('/') - + dic["reference_conformer_site_id"], + dic["global_reference_dtag"], + dic["centroid_res"].split("/"), ) def to_dict(self): return { - 'conformer_site_ids': self.conformer_site_ids, - 'residues': ["/".join(res) for res in self.residues], - 'reference_conformer_site_id': self.reference_conformer_site_id, - 'global_reference_dtag': self.global_reference_dtag, - 'centroid_res': '/'.join(self.centroid_res) + "conformer_site_ids": self.conformer_site_ids, + "residues": ["/".join(res) for res in self.residues], + "reference_conformer_site_id": self.reference_conformer_site_id, + "global_reference_dtag": self.global_reference_dtag, + "centroid_res": "/".join(self.centroid_res), } class XtalFormSite: - def __init__(self, - xtalform_id: str, - crystallographic_chain: str, - canonical_site_id: str, - members: list[tuple[str, str, str]] - ): + def __init__( + self, + xtalform_id: str, + crystallographic_chain: str, + canonical_site_id: str, + members: list[tuple[str, str, str]], + ): self.xtalform_id: str = xtalform_id self.crystallographic_chain: str = crystallographic_chain self.canonical_site_id: str = canonical_site_id @@ -781,23 +726,18 @@ def __init__(self, @staticmethod def from_dict(dic): members = [] - for member in dic['members']: + for member in dic["members"]: dtag, chain, residue, version = member.split("/") members.append((dtag, chain, residue, version)) - return XtalFormSite( - dic['xtalform_id'], - dic['crystallographic_chain'], - dic['canonical_site_id'], - members - ) + return XtalFormSite(dic["xtalform_id"], dic["crystallographic_chain"], dic["canonical_site_id"], members) def to_dict(self): dic = {} - dic['members'] = [] + dic["members"] = [] for member in self.members: - dic['members'].append("/".join(member)) + dic["members"].append("/".join(member)) - dic['xtalform_id'] = self.xtalform_id - dic['crystallographic_chain'] = self.crystallographic_chain - dic['canonical_site_id'] = self.canonical_site_id + dic["xtalform_id"] = self.xtalform_id + dic["crystallographic_chain"] = self.crystallographic_chain + dic["canonical_site_id"] = self.canonical_site_id return dic diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 359af45b..be28141d 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -212,15 +212,15 @@ def expand_structure(_structure, xtalforms: AssignedXtalForms, moving_ligand_id) def align_structure( - _structure, - moving_ligand_id, - reference_ligand_id, - g, - transforms, - site_transforms: SiteTransforms, - canonical_site_id, - conformer_site_id, - out_path, + _structure, + moving_ligand_id, + reference_ligand_id, + g, + transforms, + site_transforms: SiteTransforms, + canonical_site_id, + conformer_site_id, + out_path, ): shortest_path = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") @@ -264,18 +264,18 @@ def align_structure( def _align_structure( - _structure, - moving_ligand_id: tuple[str, str, str], - reference_ligand_id: tuple[str, str, str], - neighbourhood: dt.Neighbourhood, - g, - neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], - conformer_site_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - conformer_site_id: str, - xtalform: dt.XtalForm, - out_path: Path, + _structure, + moving_ligand_id: tuple[str, str, str], + reference_ligand_id: tuple[str, str, str], + neighbourhood: dt.Neighbourhood, + g, + neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], + conformer_site_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + conformer_site_id: str, + xtalform: dt.XtalForm, + out_path: Path, ): shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") @@ -286,16 +286,21 @@ def _align_structure( # Get the transform from previous frame to new one # Transform is 2 onto 1 if next_ligand_id != previous_ligand_id: - transform = transform_to_gemmi(neighbourhood_transforms[(next_ligand_id, previous_ligand_id,)]) + transform = transform_to_gemmi( + neighbourhood_transforms[ + ( + next_ligand_id, + previous_ligand_id, + ) + ] + ) running_transform = transform.combine(running_transform) # Apply the translation to the new frame previous_ligand_id = next_ligand_id # Subsite alignment transform - confomer_site_transform = transform_to_gemmi( - conformer_site_transforms[(canonical_site_id, conformer_site_id)] - ) + confomer_site_transform = transform_to_gemmi(conformer_site_transforms[(canonical_site_id, conformer_site_id)]) running_transform = confomer_site_transform.combine(running_transform) # Site alignment transform @@ -309,17 +314,16 @@ def _align_structure( # Drop chains without atoms in neighbourhood neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) chain_assemblies = { - _chain: _assembly - for _assembly_name, _assembly - in xtalform.assemblies.items() - for _chain in _assembly.chains + _chain: _assembly for _assembly_name, _assembly in xtalform.assemblies.items() for _chain in _assembly.chains } lig_assembly = chain_assemblies[moving_ligand_id[1]] for _model in _structure: for _chain in _model: - if (_chain.name not in lig_assembly.chains) & (_chain.name not in neighbourhood_chains): # Remove any chain the ligand isn't modelled onto + if (_chain.name not in lig_assembly.chains) & ( + _chain.name not in neighbourhood_chains + ): # Remove any chain the ligand isn't modelled onto _model.remove_chain(_chain.name) @@ -328,18 +332,18 @@ def _align_structure( def _align_reference_structure( - _structure, - dtag: str, - # moving_ligand_id: tuple[str,str,str], - # reference_ligand_id: tuple[str,str,str], - # g, - # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], - # conformer_site_transforms: dict[tuple[str, str], dt.Transform], - reference_structure_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - # conformer_site_id: str, - out_path: Path, + _structure, + dtag: str, + # moving_ligand_id: tuple[str,str,str], + # reference_ligand_id: tuple[str,str,str], + # g, + # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], + # conformer_site_transforms: dict[tuple[str, str], dt.Transform], + reference_structure_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + # conformer_site_id: str, + out_path: Path, ): running_transform = transform_to_gemmi(reference_structure_transforms[(dtag, canonical_site_id)]) @@ -377,17 +381,17 @@ def _align_reference_structure( def _align_structures_from_sites( - structures, - canonical_sites: CanonicalSites, - conformer_sites: ConformerSites, - transforms: Transforms, - neighbourhoods: LigandNeighbourhoods, - xtalforms: XtalForms, - assigned_xtalforms: AssignedXtalForms, - g, - site_transforms: SiteTransforms, - # _output_dir: Path, - output: Output, + structures, + canonical_sites: CanonicalSites, + conformer_sites: ConformerSites, + transforms: Transforms, + neighbourhoods: LigandNeighbourhoods, + xtalforms: XtalForms, + assigned_xtalforms: AssignedXtalForms, + g, + site_transforms: SiteTransforms, + # _output_dir: Path, + output: Output, ): # asd = _output_dir / "aligned" # if not asd.exists(): @@ -429,10 +433,10 @@ def _align_structures_from_sites( # Get output path aod = Path(output.source_dir) output_path = ( - aod - / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ - moving_ligand_id.residue - ].aligned_structures[canonical_site_id] + aod + / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ + moving_ligand_id.residue + ].aligned_structures[canonical_site_id] ) # Align the ligand align_structure( diff --git a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py index 6337c76b..3b34264c 100644 --- a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py +++ b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py @@ -27,8 +27,11 @@ ) # from ligand_neighbourhood_alignment.save_sites import save_sites -from ligand_neighbourhood_alignment.structures import get_structures, get_transform_from_residues, \ - _get_transform_from_residues +from ligand_neighbourhood_alignment.structures import ( + get_structures, + get_transform_from_residues, + _get_transform_from_residues, +) def get_components(g): @@ -151,9 +154,9 @@ def get_sites_from_conformer_sites(conformer_sites: ConformerSites, neighbourhoo def get_xtalform_sites_from_canonical_sites( - canonical_sites: CanonicalSites, - assigned_xtalforms: AssignedXtalForms, - xtalforms: XtalForms, + canonical_sites: CanonicalSites, + assigned_xtalforms: AssignedXtalForms, + xtalforms: XtalForms, ): """ Each canonical site may occur in several forms, depending on the @@ -232,10 +235,10 @@ def get_subsite_transforms(sites: CanonicalSites, structures): def _update_conformer_site_transforms( - conformer_site_transforms, - canonical_site: dt.CanonicalSite, - conformer_sites: dict[str, dt.ConformerSite], - structures, + conformer_site_transforms, + canonical_site: dt.CanonicalSite, + conformer_sites: dict[str, dt.ConformerSite], + structures, ): ref_conformer_site = conformer_sites[canonical_site.reference_conformer_site_id] ref_conformer_site_residues = ref_conformer_site.residues @@ -290,17 +293,15 @@ def get_site_transforms(sites: CanonicalSites, structures): def _update_canonical_site_transforms( - canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id, - canonical_site: dt.CanonicalSite, - # canonical_sites: dict[str, dt.CanonicalSite], - conformer_sites: dict[str, dt.ConformerSite], - structures, + canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id, + canonical_site: dt.CanonicalSite, + # canonical_sites: dict[str, dt.CanonicalSite], + conformer_sites: dict[str, dt.ConformerSite], + structures, ): rss = structures[canonical_site.global_reference_dtag] - ref_site_all_ress = [ - (chain.name, res.seqid.num) for model in rss for chain in model for res in chain - ] + ref_site_all_ress = [(chain.name, res.seqid.num) for model in rss for chain in model for res in chain] srs = conformer_sites[canonical_site.reference_conformer_site_id].reference_ligand_id[0] site_structure = structures[srs] @@ -313,11 +314,11 @@ def _update_canonical_site_transforms( def _update_reference_structure_transforms( - reference_structure_transforms, - key, - structures, - canonical_site: dt.CanonicalSite, - conformer_sites: dict[str, dt.ConformerSite], + reference_structure_transforms, + key, + structures, + canonical_site: dt.CanonicalSite, + conformer_sites: dict[str, dt.ConformerSite], ): ress = [(x[0], x[1]) for x in canonical_site.residues] to_structure = structures[conformer_sites[canonical_site.reference_conformer_site_id].reference_ligand_id[0]] diff --git a/src/ligand_neighbourhood_alignment/get_alignability.py b/src/ligand_neighbourhood_alignment/get_alignability.py index f1d8b33c..a349cd69 100644 --- a/src/ligand_neighbourhood_alignment/get_alignability.py +++ b/src/ligand_neighbourhood_alignment/get_alignability.py @@ -25,9 +25,10 @@ def match_cas( ligand_1_atom_id, ligand_1_atom, ) in zip(ligand_1_neighbourhood.atom_ids, ligand_1_neighbourhood.atoms): - for (ligand_2_atom_id, ligand_2_atom,) in zip( - ligand_2_neighbourhood.atom_ids, ligand_2_neighbourhood.atoms - ): + for ( + ligand_2_atom_id, + ligand_2_atom, + ) in zip(ligand_2_neighbourhood.atom_ids, ligand_2_neighbourhood.atoms): if ligand_1_atom_id.atom == "CA": if match_atom(ligand_1_atom, ligand_2_atom, ignore_chain=True): alignable_cas.append( @@ -53,16 +54,13 @@ def match_cas( rmsd = sup.rmsd if rmsd < max_alignable_rmsd: - return True, Transform( - vec=sup.transform.vec.tolist(), mat=sup.transform.mat.tolist() - ) + return True, Transform(vec=sup.transform.vec.tolist(), mat=sup.transform.mat.tolist()) else: return False, None else: return False, None - def get_alignability( ligand_neighbourhoods: LigandNeighbourhoods, system_data: SystemData, @@ -88,9 +86,7 @@ def get_alignability( ligand_neighbourhoods.ligand_neighbourhoods, ): # See if atoms match - transform is frame 2 to frame 1 - ca_match, transform = match_cas( - ligand_1_neighbourhood, ligand_2_neighbourhood - ) + ca_match, transform = match_cas(ligand_1_neighbourhood, ligand_2_neighbourhood) if ca_match: connectivities.append(1) @@ -103,9 +99,8 @@ def get_alignability( logger.debug(connectivity) - return np.array(connectivity), Transforms( - ligand_ids=transform_ids, transforms=transforms - ) + return np.array(connectivity), Transforms(ligand_ids=transform_ids, transforms=transforms) + from ligand_neighbourhood_alignment import dt @@ -138,11 +133,7 @@ def _match_cas( ) if len(alignable_cas) >= min( - [ - min_alignable_atoms, - len(ligand_1_neighbourhood.atoms), - len(ligand_2_neighbourhood.atoms) - ] + [min_alignable_atoms, len(ligand_1_neighbourhood.atoms), len(ligand_2_neighbourhood.atoms)] ): sup = gemmi.superpose_positions( [alignable_ca[0] for alignable_ca in alignable_cas], @@ -153,22 +144,23 @@ def _match_cas( rmsd = sup.rmsd if rmsd < max_alignable_rmsd: - return True, dt.Transform( - vec=transform.vec.tolist(), mat=transform.mat.tolist() - ), dt.Transform( - vec=inverse_transform.vec.tolist(), mat=inverse_transform.mat.tolist() + return ( + True, + dt.Transform(vec=transform.vec.tolist(), mat=transform.mat.tolist()), + dt.Transform(vec=inverse_transform.vec.tolist(), mat=inverse_transform.mat.tolist()), ) else: return False, None, None else: return False, None, None + def _update_ligand_neighbourhood_transforms( - ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], - lid: tuple[str, str, str, str], - ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], - structures, - ): + ligand_neighbourhood_transforms: dict[tuple[tuple[str, str, str, str], tuple[str, str, str, str]], dt.Transform], + lid: tuple[str, str, str, str], + ligand_neighbourhoods: dict[tuple[str, str, str, str], dt.Neighbourhood], + structures, +): # connectivity = [] # transform_ids = [] # transforms = [] @@ -181,11 +173,12 @@ def _update_ligand_neighbourhood_transforms( ligand_1_id = lid ligand_1_neighbourhood = ligand_neighbourhoods[lid] matches = [] - for (ligand_2_id, ligand_2_neighbourhood,) in ligand_neighbourhoods.items(): + for ( + ligand_2_id, + ligand_2_neighbourhood, + ) in ligand_neighbourhoods.items(): # See if atoms match - transform is frame 2 to frame 1 - ca_match, transform, inverse_transform = _match_cas( - ligand_1_neighbourhood, ligand_2_neighbourhood - ) + ca_match, transform, inverse_transform = _match_cas(ligand_1_neighbourhood, ligand_2_neighbourhood) if ca_match: # connectivities.append(1) @@ -198,8 +191,8 @@ def _update_ligand_neighbourhood_transforms( if len(matches) == 0: rprint(f"No Matches For {ligand_1_id}! No alignments will be generated!") - # else: - # connectivities.append(0) + # else: + # connectivities.append(0) # connectivity.append(connectivities) diff --git a/src/ligand_neighbourhood_alignment/get_alignable_sites.py b/src/ligand_neighbourhood_alignment/get_alignable_sites.py index e698e02c..7bf0fa2a 100644 --- a/src/ligand_neighbourhood_alignment/get_alignable_sites.py +++ b/src/ligand_neighbourhood_alignment/get_alignable_sites.py @@ -2,13 +2,7 @@ def match_site(containing_site, contained_site): - if len( - [ - ligand_id - for ligand_id in contained_site - if ligand_id in containing_site - ] - ) == len(contained_site): + if len([ligand_id for ligand_id in contained_site if ligand_id in containing_site]) == len(contained_site): return True else: return False @@ -17,9 +11,7 @@ def match_site(containing_site, contained_site): # def get_alignable_sites_() -def get_alignable_sites( - connected_components, alignable_sites: list[AlignableSite] | None -): +def get_alignable_sites(connected_components, alignable_sites: list[AlignableSite] | None): alignable_site_num: int = 0 if alignable_sites: @@ -44,11 +36,7 @@ def get_alignable_sites( # If not site has matched create a new alignable site if not site_match: - alignable_sites.append( - AlignableSite( - id=alignable_site_num, name="", ligand_ids=site_ligand_ids - ) - ) + alignable_sites.append(AlignableSite(id=alignable_site_num, name="", ligand_ids=site_ligand_ids)) alignable_site_num += 1 return alignable_sites diff --git a/src/ligand_neighbourhood_alignment/get_canonical_sites.py b/src/ligand_neighbourhood_alignment/get_canonical_sites.py index 6ee9f787..73c3d999 100644 --- a/src/ligand_neighbourhood_alignment/get_canonical_sites.py +++ b/src/ligand_neighbourhood_alignment/get_canonical_sites.py @@ -34,9 +34,7 @@ def get_canonical_sites( for ligand_id, ligand_neighbourhood in ligand_neighbourhoods.items(): logger.debug(f"{ligand_id}") # Check if there is a match - match: int | None = match_neighbourhood_to_sites( - canonical_sites, ligand_neighbourhood - ) + match: int | None = match_neighbourhood_to_sites(canonical_sites, ligand_neighbourhood) # If so add the ligand id to the members of the canonical site if match: @@ -72,12 +70,8 @@ def get_canonical_sites( canonical_site_index, canonical_site_ligand_ids, ) in canonical_site_members.items(): - canonical_site: CanonicalSite = canonical_sites[ - canonical_site_index - ] - rematch: bool = match_neighbourhood_to_site( - canonical_site, ligand_neighbourhood - ) + canonical_site: CanonicalSite = canonical_sites[canonical_site_index] + rematch: bool = match_neighbourhood_to_site(canonical_site, ligand_neighbourhood) if rematch: if ligand_id not in canonical_site_ligand_ids: canonical_site_ligand_ids.append(ligand_id) diff --git a/src/ligand_neighbourhood_alignment/get_ligand_neighbourhoods.py b/src/ligand_neighbourhood_alignment/get_ligand_neighbourhoods.py index e0285d03..97e2fe46 100644 --- a/src/ligand_neighbourhood_alignment/get_ligand_neighbourhoods.py +++ b/src/ligand_neighbourhood_alignment/get_ligand_neighbourhoods.py @@ -44,8 +44,8 @@ def get_structure_fragments(dataset: Dataset, structure: Structure) -> dict[Liga def _get_model_and_artefact_atoms( - residue_neighbours: dict[tuple[float, float, float], gemmi.NeighborSearch.Mark], - structure: Structure, + residue_neighbours: dict[tuple[float, float, float], gemmi.NeighborSearch.Mark], + structure: Structure, ) -> tuple[list[gemmi.NeighborSearch.Mark], list[gemmi.NeighborSearch.Mark]]: # Check each mark for its image and partition them on this model_atoms: list[gemmi.NeighborSearch.Mark] = [] @@ -72,8 +72,8 @@ def _get_model_and_artefact_atoms( def __get_model_and_artefact_atoms( - residue_neighbours: dict[tuple[float, float, float], gemmi.NeighborSearch.Mark], - structure: Structure, + residue_neighbours: dict[tuple[float, float, float], gemmi.NeighborSearch.Mark], + structure: Structure, ) -> tuple[dict[gemmi.NeighborSearch.Mark, gemmi.CRA], dict[gemmi.NeighborSearch.Mark, gemmi.CRA]]: # Check each mark for its image and partition them on this model_atoms: dict[gemmi.NeighborSearch.Mark, gemmi.CRA] = {} @@ -100,9 +100,9 @@ def __get_model_and_artefact_atoms( def get_model_and_artefact_atoms( - residue_neighbours: list[tuple[gemmi.Position, gemmi.CRA]], - structure: Structure, - fragment, + residue_neighbours: list[tuple[gemmi.Position, gemmi.CRA]], + structure: Structure, + fragment, ) -> tuple[list[tuple[gemmi.Position, gemmi.CRA]], list[tuple[gemmi.Position, gemmi.CRA]]]: # Check each mark for its image and partition them on this model_atoms: list[tuple[gemmi.Position, gemmi.CRA]] = [] @@ -162,11 +162,11 @@ def get_model_and_artefact_atoms( def get_ligand_neighbourhood( - structure: Structure, - ns: gemmi.NeighborSearch, - fragment: gemmi.Residue, - min_dist: float = 0.01, - max_dist: float = 5.0, + structure: Structure, + ns: gemmi.NeighborSearch, + fragment: gemmi.Residue, + min_dist: float = 0.01, + max_dist: float = 5.0, ) -> LigandNeighbourhood: # For each atom, get the neighbouring atoms, and filter them on their # real space position @@ -290,11 +290,11 @@ def get_ligand_neighbourhood( def _get_ligand_neighbourhood( - structure, - ns: gemmi.NeighborSearch, - fragment: gemmi.Residue, - min_dist: float = 0.01, - max_dist: float = 5.0, + structure, + ns: gemmi.NeighborSearch, + fragment: gemmi.Residue, + min_dist: float = 0.01, + max_dist: float = 5.0, ): # For each atom, get the neighbouring atoms, and filter them on their # real space position @@ -401,16 +401,13 @@ def _get_ligand_neighbourhood( ) # Cosntruct the neighbourhood - ligand_neighbourhood = dt.Neighbourhood( - model_atoms, - artefact_atoms - ) + ligand_neighbourhood = dt.Neighbourhood(model_atoms, artefact_atoms) return ligand_neighbourhood def get_dataset_neighbourhoods( - dataset: Dataset, xtalform: XtalForm, max_radius: float = 7.0 + dataset: Dataset, xtalform: XtalForm, max_radius: float = 7.0 ) -> dict[LigandID, LigandNeighbourhood]: # Load the structure logger.debug(dataset.pdb) @@ -438,9 +435,9 @@ def get_dataset_neighbourhoods( def get_ligand_neighbourhoods( - system_data: SystemData, - xtalforms: XtalForms, - assigned_xtalforms: AssignedXtalForms, + system_data: SystemData, + xtalforms: XtalForms, + assigned_xtalforms: AssignedXtalForms, ) -> LigandNeighbourhoods: # Iterate over data, loading in structures, getting ligands for each # structure and finding their neighbourhoods diff --git a/src/ligand_neighbourhood_alignment/get_transforms.py b/src/ligand_neighbourhood_alignment/get_transforms.py index 157d9058..1677fca8 100644 --- a/src/ligand_neighbourhood_alignment/get_transforms.py +++ b/src/ligand_neighbourhood_alignment/get_transforms.py @@ -63,9 +63,7 @@ def get_transforms(ligand_neighbourhoods: LigandNeighbourhoods, g): ligand_neighbourhoods.ligand_neighbourhoods, ): if ligand_id_2 in g[ligand_id_1].neighbours(): - transform = get_transform( - ligand_neighbourhood_2, ligand_neighbourhood_1 - ) + transform = get_transform(ligand_neighbourhood_2, ligand_neighbourhood_1) transforms[ligand_id_1][ligand_id_2] = transform return Transforms(transforms=transforms) diff --git a/src/ligand_neighbourhood_alignment/make_data_json.py b/src/ligand_neighbourhood_alignment/make_data_json.py index d7e3f003..98d58549 100644 --- a/src/ligand_neighbourhood_alignment/make_data_json.py +++ b/src/ligand_neighbourhood_alignment/make_data_json.py @@ -43,6 +43,7 @@ def get_closest_lig(structure, coord): return min(distances, key=lambda x: distances[x]) + from ligand_neighbourhood_alignment import dt @@ -62,11 +63,7 @@ def get_ligand_binding_events_from_structure( if residue.name == "DMS": continue - lids.append( - LigandID( - dtag=dtag, chain=chain.name, residue=residue.seqid.num - ) - ) + lids.append(LigandID(dtag=dtag, chain=chain.name, residue=residue.seqid.num)) lbe = LigandBindingEvent( id=event_id, dtag=dtag, @@ -79,6 +76,7 @@ def get_ligand_binding_events_from_structure( return LigandBindingEvents(ligand_ids=lids, ligand_binding_events=lbes) + def _get_ligand_binding_events_from_structure( pdb_path: Path, xmap_path: Path, @@ -116,9 +114,7 @@ def get_ligand_binding_events_from_panddas(pandda_event_csvs, pdb_path, dtag): lbes = [] # Iterate the events, and if a match add a ligand binding event for pandda_path, event_table in pandda_event_csvs.items(): - processed_datasets_dir = ( - Path(pandda_path) / constants.PANDDA_PROCESSED_DATASETS_DIR - ) + processed_datasets_dir = Path(pandda_path) / constants.PANDDA_PROCESSED_DATASETS_DIR for idx, row in event_table.iterrows(): _dtag = row["dtag"] @@ -148,11 +144,8 @@ def get_ligand_binding_events_from_panddas(pandda_event_csvs, pdb_path, dtag): continue # Get the event map - xmap_path = ( - processed_dataset_dir - / constants.PANDDA_EVENT_MAP_TEMPLATE.format( - dtag=dtag, event_id=event_id, bdc=bdc - ) + xmap_path = processed_dataset_dir / constants.PANDDA_EVENT_MAP_TEMPLATE.format( + dtag=dtag, event_id=event_id, bdc=bdc ) lid = LigandID(dtag=dtag, chain=chain, residue=residue_num) lbe = LigandBindingEvent( @@ -174,9 +167,7 @@ def _get_ligand_binding_events_from_panddas(pandda_event_csvs, pdb_path, dtag): ligand_binding_events = {} # Iterate the events, and if a match add a ligand binding event for pandda_path, event_table in pandda_event_csvs.items(): - processed_datasets_dir = ( - Path(pandda_path) / constants.PANDDA_PROCESSED_DATASETS_DIR - ) + processed_datasets_dir = Path(pandda_path) / constants.PANDDA_PROCESSED_DATASETS_DIR for idx, row in event_table.iterrows(): _dtag = row["dtag"] @@ -206,11 +197,8 @@ def _get_ligand_binding_events_from_panddas(pandda_event_csvs, pdb_path, dtag): continue # Get the event map - xmap_path = ( - processed_dataset_dir - / constants.PANDDA_EVENT_MAP_TEMPLATE.format( - dtag=dtag, event_id=event_id, bdc=bdc - ) + xmap_path = processed_dataset_dir / constants.PANDDA_EVENT_MAP_TEMPLATE.format( + dtag=dtag, event_id=event_id, bdc=bdc ) lbe = dt.LigandBindingEvent( id=str(event_id), @@ -231,14 +219,10 @@ def make_data_json_from_pandda_dir(pandda_dir: Path, output_dir: Path): # Get the PanDDA dirs analyses_dir: Path = pandda_dir / constants.PANDDA_ANALYSES_DIR - processed_datasets_dir: Path = ( - pandda_dir / constants.PANDDA_PROCESSED_DATASETS_DIR - ) + processed_datasets_dir: Path = pandda_dir / constants.PANDDA_PROCESSED_DATASETS_DIR # Get the event table - event_table_path: Path = ( - analyses_dir / constants.PANDDA_EVENTS_INSPECT_TABLE_PATH - ) + event_table_path: Path = analyses_dir / constants.PANDDA_EVENTS_INSPECT_TABLE_PATH event_table = pd.read_csv(event_table_path) # Iterate the event table, pulling out associated ligands and @@ -261,12 +245,9 @@ def make_data_json_from_pandda_dir(pandda_dir: Path, output_dir: Path): # Get the structure processed_dataset_dir = processed_datasets_dir / dtag - final_structure_dir_path = ( - processed_dataset_dir / constants.PANDDA_FINAL_STRUCTURE_PDB_DIR - ) - final_structure_path = ( - final_structure_dir_path - / constants.PANDDA_FINAL_STRUCTURE_PDB_TEMPLATE.format(dtag=dtag) + final_structure_dir_path = processed_dataset_dir / constants.PANDDA_FINAL_STRUCTURE_PDB_DIR + final_structure_path = final_structure_dir_path / constants.PANDDA_FINAL_STRUCTURE_PDB_TEMPLATE.format( + dtag=dtag ) structure = gemmi.read_structure(str(final_structure_path)) @@ -277,11 +258,8 @@ def make_data_json_from_pandda_dir(pandda_dir: Path, output_dir: Path): continue # Get the event map - xmap_path = ( - processed_dataset_dir - / constants.PANDDA_EVENT_MAP_TEMPLATE.format( - dtag=dtag, event_id=event_id, bdc=bdc - ) + xmap_path = processed_dataset_dir / constants.PANDDA_EVENT_MAP_TEMPLATE.format( + dtag=dtag, event_id=event_id, bdc=bdc ) # If dtag not already processed, add @@ -301,12 +279,9 @@ def make_data_json_from_pandda_dir(pandda_dir: Path, output_dir: Path): dataset_ids = [] for dtag, events in initial_datasets.items(): processed_dataset_dir = processed_datasets_dir / dtag - final_structure_dir_path = ( - processed_dataset_dir / constants.PANDDA_FINAL_STRUCTURE_PDB_DIR - ) - final_structure_path = ( - final_structure_dir_path - / constants.PANDDA_FINAL_STRUCTURE_PDB_TEMPLATE.format(dtag=dtag) + final_structure_dir_path = processed_dataset_dir / constants.PANDDA_FINAL_STRUCTURE_PDB_DIR + final_structure_path = final_structure_dir_path / constants.PANDDA_FINAL_STRUCTURE_PDB_TEMPLATE.format( + dtag=dtag ) event_ids = [ LigandID( @@ -328,9 +303,7 @@ def make_data_json_from_pandda_dir(pandda_dir: Path, output_dir: Path): dataset_ids.append(DatasetID(dtag=dtag)) datasets.append(dataset) - system_data: SystemData = SystemData( - dataset_ids=dataset_ids, datasets=datasets - ) + system_data: SystemData = SystemData(dataset_ids=dataset_ids, datasets=datasets) logger.info(f"Logging {len(system_data.datasets)} datasets") logger.info(f"Saveing output json to {output_dir}/data.json") @@ -355,9 +328,7 @@ def make_data_json(data_dir: Path, output_dir: Path): datasets.append(dataset) dataset_ids.append(DatasetID(dtag=dtag)) - system_data: SystemData = SystemData( - dataset_ids=dataset_ids, datasets=datasets - ) + system_data: SystemData = SystemData(dataset_ids=dataset_ids, datasets=datasets) logger.info(f"Logging {len(system_data.datasets)} datasets") logger.info(f"Saveing output json to {output_dir}/data.json") diff --git a/src/ligand_neighbourhood_alignment/matching.py b/src/ligand_neighbourhood_alignment/matching.py index d07ba344..bc127078 100644 --- a/src/ligand_neighbourhood_alignment/matching.py +++ b/src/ligand_neighbourhood_alignment/matching.py @@ -21,11 +21,13 @@ def match_atom( return False + from ligand_neighbourhood_alignment import dt + def _match_atom( - canonical_site_atom_id: tuple[str, str,str], - ligand_neighbourhood_atom_id: tuple[str, str,str], + canonical_site_atom_id: tuple[str, str, str], + ligand_neighbourhood_atom_id: tuple[str, str, str], ignore_chain=False, ) -> bool: # id_1 = canonical_site_atom.atom_id @@ -74,9 +76,7 @@ def match_neighbourhood_to_site( min_alignable_atoms: int = 5, ) -> bool: # Check if there is an alignable number of atoms shared between the - return match_atoms( - canonical_site.atoms, ligand_neighbourhood.atoms, min_alignable_atoms - ) + return match_atoms(canonical_site.atoms, ligand_neighbourhood.atoms, min_alignable_atoms) def match_neighbourhood_to_sites( @@ -84,9 +84,7 @@ def match_neighbourhood_to_sites( ligand_neighbourhood: LigandNeighbourhood, ) -> int | None: for canonical_site_id, canonical_site in canonical_sites.items(): - match: bool = match_neighbourhood_to_site( - canonical_site, ligand_neighbourhood - ) + match: bool = match_neighbourhood_to_site(canonical_site, ligand_neighbourhood) if match: return match diff --git a/src/ligand_neighbourhood_alignment/save_neighbourhoods.py b/src/ligand_neighbourhood_alignment/save_neighbourhoods.py index c1de6179..621c7e6f 100644 --- a/src/ligand_neighbourhood_alignment/save_neighbourhoods.py +++ b/src/ligand_neighbourhood_alignment/save_neighbourhoods.py @@ -3,8 +3,6 @@ from ligand_neighbourhood_alignment.data import LigandNeighbourhoods -def save_neighbourhoods( - ligand_neighbourhoods: LigandNeighbourhoods, path: Path -): +def save_neighbourhoods(ligand_neighbourhoods: LigandNeighbourhoods, path: Path): with open(path, "w") as f: f.write(ligand_neighbourhoods.json()) diff --git a/src/ligand_neighbourhood_alignment/structures.py b/src/ligand_neighbourhood_alignment/structures.py index 2b2286dd..eaa4f07a 100644 --- a/src/ligand_neighbourhood_alignment/structures.py +++ b/src/ligand_neighbourhood_alignment/structures.py @@ -61,7 +61,8 @@ def get_transform_from_residues(rs: list[ResidueID], srs, ssrs): return sup.transform -def _get_transform_from_residues(rs: list[tuple[str,str]], srs, ssrs): + +def _get_transform_from_residues(rs: list[tuple[str, str]], srs, ssrs): # Transform from ssrs to srs acs = [] for resid in rs: diff --git a/tests/test_boilerplate_removed.py b/tests/test_boilerplate_removed.py index b823c53b..a5df9c22 100644 --- a/tests/test_boilerplate_removed.py +++ b/tests/test_boilerplate_removed.py @@ -29,8 +29,7 @@ def test_module_summary(): summary = metadata("python3-pip-skeleton")["summary"] skeleton_check( "One line description of your module" in summary, - "Please change project.description in ./pyproject.toml " - "to be a one line description of your module", + "Please change project.description in ./pyproject.toml " "to be a one line description of your module", ) From 0aa89b9b728c5a45bae647cb0ed4075aa8c61f2c Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:10:14 +0100 Subject: [PATCH 85/90] Remove corrupted constant --- src/ligand_neighbourhood_alignment/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/constants.py b/src/ligand_neighbourhood_alignment/constants.py index 29efdff4..7b0314e7 100644 --- a/src/ligand_neighbourhood_alignment/constants.py +++ b/src/ligand_neighbourhood_alignment/constants.py @@ -42,7 +42,7 @@ FS_MODEL_YAML_FILE_NAME = "fs_model.yaml" ASSEMBLIES_YAML_FILE_NAME = "assemblies.yaml" -XTALFORMS_YAML_FILEĎ€_NAME = "xtalforms.yaml" +XTALFORMS_YAML_FILE_NAME = "xtalforms.yaml" ASSIGNED_XTALFORMS_YAML_FILE_NAME = "assigned_xtalforms.yaml" NEIGHBOURHOODS_YAML_FILE_NAME = "neighbourhoods.yaml" CONNECTED_COMPONENTS_YAML_NAME = "connected_components.yaml" From da5a3761a10895f4a0becfabd941ab1e08716e3f Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:13:54 +0100 Subject: [PATCH 86/90] Remove corrupted constant --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f25ae9d0..dfe86cf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ description = "One line description of your module" dependencies = [ "gemmi", "loguru", - "pydantic", + "pydantic=2.6.0", "networkx", "numpy", "rich", From 0f3356f45db1d71d8baf91e3a10f3e16f4b2b2b7 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:16:27 +0100 Subject: [PATCH 87/90] Remove corrupted constant --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dfe86cf6..91fc5cd1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ description = "One line description of your module" dependencies = [ "gemmi", "loguru", - "pydantic=2.6.0", + "pydantic==2.6.0", "networkx", "numpy", "rich", From bb3b953d8bd3b7f5f2612f581975151d194ca68d Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 6 May 2024 18:01:48 +0100 Subject: [PATCH 88/90] Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site --- .../align_xmaps.py | 2 + .../generate_aligned_structures.py | 285 ++++++++++++++---- 2 files changed, 222 insertions(+), 65 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/align_xmaps.py b/src/ligand_neighbourhood_alignment/align_xmaps.py index 4bb4762c..5f096cdc 100644 --- a/src/ligand_neighbourhood_alignment/align_xmaps.py +++ b/src/ligand_neighbourhood_alignment/align_xmaps.py @@ -509,6 +509,8 @@ def resample_xmap(new_xmap, aligned_res): cell = get_cell(frame_array, step) # print(cell) + print(f'Origin for xmap is now: {frame_lower_bound}') + new_map = get_new_map(cell, frame_array, frame_lower_bound, step) return new_map diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index be28141d..e2754cdc 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -4,6 +4,8 @@ import gemmi import networkx as nx from loguru import logger +import numpy as np +from rich import print as rprint from ligand_neighbourhood_alignment.data import ( # Transform,; AlignableSite,; XtalForms, AssignedXtalForms, @@ -212,15 +214,15 @@ def expand_structure(_structure, xtalforms: AssignedXtalForms, moving_ligand_id) def align_structure( - _structure, - moving_ligand_id, - reference_ligand_id, - g, - transforms, - site_transforms: SiteTransforms, - canonical_site_id, - conformer_site_id, - out_path, + _structure, + moving_ligand_id, + reference_ligand_id, + g, + transforms, + site_transforms: SiteTransforms, + canonical_site_id, + conformer_site_id, + out_path, ): shortest_path = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") @@ -262,24 +264,191 @@ def align_structure( from ligand_neighbourhood_alignment import dt +def _mark_atom_pos_to_ni_pos_tup(point, mark, st): + cra = mark.to_cra(st[0]) + ni = st.cell.find_nearest_pbc_position( + point, + cra.atom.pos, + mark.image_idx) + return (ni.x, ni.y, ni.z) + + +def _drop_non_binding_chains_and_symmetrize_waters( + _structure, + neighbourhood, + moving_ligand_id, + xtalform, +): + # Get a copy of structure to edit + new_structure = _structure.clone() + + # Determine which chains have binding residues + neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + + # Determine the assembly each chain is part of + chain_assemblies = { + _chain: _assembly for _assembly_name, _assembly in xtalform.assemblies.items() for _chain in _assembly.chains + } + + # Get the assembly the ligand is modelled as part of + lig_assembly = chain_assemblies[moving_ligand_id[1]] + + # Determine which waters are bound near the ligand, and at what positions + ns = gemmi.NeighborSearch(new_structure[0], new_structure.cell, 10).populate(include_h=False) + + # # Iterate ligand heavy atoms, finding marks and collating + all_marks = {} + atom_multiplicities = {} + for atom in new_structure[0][moving_ligand_id[1]][moving_ligand_id[2]][0]: + point = atom.pos + marks = ns.find_atoms(point, '\0', radius=5) + + # # Get cras + for mark in marks: + cra = mark.to_cra(new_structure[0]) + + if cra.residue.name != 'HOH': + continue + + base_atom_id = (cra.chain.name, str(cra.residue.seqid.num), cra.atom.name,) + + # Note first occurence of each atom + if base_atom_id not in atom_multiplicities: + atom_multiplicities[base_atom_id] = 1 + + # Get the atom id with multiplicity + atom_id = base_atom_id + (atom_multiplicities[base_atom_id],) + + pos1 = _mark_atom_pos_to_ni_pos_tup(point, mark, new_structure) + + # + if atom_id in all_marks: + # Get current marks with same base_atom_id + comparator_ids = [(a, b, c, d) for a, b, c, d in all_marks if (a, b, c) == base_atom_id] + + # Check if it is distinct from all of them + poss = [ + _mark_atom_pos_to_ni_pos_tup( + all_marks[comparator_id][0], + all_marks[comparator_id][1], + new_structure + ) + for comparator_id + in comparator_ids + ] + + # If so increase the multiplicity by one and add the atom with the new multiplicity + if not all([np.allclose(pos1, pos2, atol=0.1) for pos2 in poss]): + atom_multiplicities[base_atom_id] += 1 + atom_id = base_atom_id + (atom_multiplicities[base_atom_id],) + + # Otherwise skip + else: + continue + + all_marks[atom_id] = (point, mark, pos1) + + # Update water positions if they are near ligand but modelled elsewhere + local_water_chains = {} + chain_name_to_chain = {_chain.name: _chain for _chain in new_structure[0]} + for atom_id, (point, mark, mark_pos) in all_marks.items(): + # Get the corresponding atom + cra = mark.to_cra(new_structure[0]) + + # if not a water, skip + if cra.residue.name != 'HOH': + continue + + # If a symatom, find the symchain to associate it with + if atom_id[3] > 1: + chain_name = f'{cra.chain.name}{atom_id[3]}' + # # If associated with a new symchain, add it to the structure + if chain_name not in chain_name_to_chain: + chain = gemmi.Chain(chain_name) + new_structure[0].add_chain(chain) + chain_name_to_chain[chain_name] = chain + + # # Otherwise get the knewn chain + # else:else + chain = new_structure[0][chain_name] + + # Add the new residue, and select the relevant atom + residue = cra.residue.clone() + chain.add_residue(residue) + residue = new_structure[0][chain_name][atom_id[1]][0] + atom = residue[atom_id[2]][0] + + # Otherwise get the original atom for modification + else: + chain = cra.chain + residue = cra.residue + atom = cra.atom + + # If water update position and note chain + if residue.name == 'HOH': + # Record local water chain and seqid + if chain.name not in local_water_chains: + local_water_chains[chain.name] = [] + local_water_chains[chain.name].append(residue.seqid.num) + + # Update water position from mark + pos = atom.pos + pos.x = mark_pos[0] + pos.y = mark_pos[1] + pos.z = mark_pos[2] + + # Drop residues and non-local waters from non-binding chains containing site waters + new_chains = [] + for _model in new_structure: + for _chain in _model: + + # Create a new chain to hold symmetry waters from non-binding chains + new_chain = gemmi.Chain(_chain.name) + + # Iterate residues in the old chain, adding the local waters + for _residue in _chain: + if _residue.name == 'HOH': + if _chain.name in local_water_chains: + if _residue.seqid.num in local_water_chains[_chain.name]: + new_chain.add_residue(_residue.clone()) + else: + if (_chain.name in lig_assembly.chains) or (_chain.name in neighbourhood_chains): + new_chain.add_residue(_residue.clone()) + + if len(new_chain) > 0: + new_chains.append(new_chain) + + for new_chain in new_chains: + del new_structure[0][new_chain.name] + new_structure[0].add_chain(new_chain) + + return new_structure + def _align_structure( - _structure, - moving_ligand_id: tuple[str, str, str], - reference_ligand_id: tuple[str, str, str], - neighbourhood: dt.Neighbourhood, - g, - neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], - conformer_site_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - conformer_site_id: str, - xtalform: dt.XtalForm, - out_path: Path, + _structure, + moving_ligand_id: tuple[str, str, str], + reference_ligand_id: tuple[str, str, str], + neighbourhood: dt.Neighbourhood, + g, + neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], + conformer_site_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + conformer_site_id: str, + xtalform: dt.XtalForm, + out_path: Path, ): shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") + # Drop chains without atoms in neighbourhood + reduced_structure = _drop_non_binding_chains_and_symmetrize_waters( + _structure, + neighbourhood, + moving_ligand_id, + xtalform) + previous_ligand_id = moving_ligand_id running_transform = gemmi.Transform() for next_ligand_id in shortest_path: @@ -309,41 +478,27 @@ def _align_structure( logger.debug(f"Transform from native frame to reference frame is: {gemmi_to_transform(running_transform)}") - _structure = superpose_structure(running_transform, _structure) + _structure = superpose_structure(running_transform, reduced_structure) - # Drop chains without atoms in neighbourhood - neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) - chain_assemblies = { - _chain: _assembly for _assembly_name, _assembly in xtalform.assemblies.items() for _chain in _assembly.chains - } - lig_assembly = chain_assemblies[moving_ligand_id[1]] - for _model in _structure: - for _chain in _model: - - if (_chain.name not in lig_assembly.chains) & ( - _chain.name not in neighbourhood_chains - ): # Remove any chain the ligand isn't modelled onto - - _model.remove_chain(_chain.name) # Write the fully aligned structure _structure.write_pdb(str(out_path)) def _align_reference_structure( - _structure, - dtag: str, - # moving_ligand_id: tuple[str,str,str], - # reference_ligand_id: tuple[str,str,str], - # g, - # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], - # conformer_site_transforms: dict[tuple[str, str], dt.Transform], - reference_structure_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - # conformer_site_id: str, - out_path: Path, + _structure, + dtag: str, + # moving_ligand_id: tuple[str,str,str], + # reference_ligand_id: tuple[str,str,str], + # g, + # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], + # conformer_site_transforms: dict[tuple[str, str], dt.Transform], + reference_structure_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + # conformer_site_id: str, + out_path: Path, ): running_transform = transform_to_gemmi(reference_structure_transforms[(dtag, canonical_site_id)]) @@ -353,10 +508,10 @@ def _align_reference_structure( logger.debug(f"Transform from native frame to reference frame is: {gemmi_to_transform(running_transform)}") - _structure = superpose_structure(running_transform, _structure) + new_structure = superpose_structure(running_transform, _structure) # Write the fully aligned structure - _structure.write_pdb(str(out_path)) + new_structure.write_pdb(str(out_path)) # def align_artefacts(): @@ -381,17 +536,17 @@ def _align_reference_structure( def _align_structures_from_sites( - structures, - canonical_sites: CanonicalSites, - conformer_sites: ConformerSites, - transforms: Transforms, - neighbourhoods: LigandNeighbourhoods, - xtalforms: XtalForms, - assigned_xtalforms: AssignedXtalForms, - g, - site_transforms: SiteTransforms, - # _output_dir: Path, - output: Output, + structures, + canonical_sites: CanonicalSites, + conformer_sites: ConformerSites, + transforms: Transforms, + neighbourhoods: LigandNeighbourhoods, + xtalforms: XtalForms, + assigned_xtalforms: AssignedXtalForms, + g, + site_transforms: SiteTransforms, + # _output_dir: Path, + output: Output, ): # asd = _output_dir / "aligned" # if not asd.exists(): @@ -433,10 +588,10 @@ def _align_structures_from_sites( # Get output path aod = Path(output.source_dir) output_path = ( - aod - / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ - moving_ligand_id.residue - ].aligned_structures[canonical_site_id] + aod + / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ + moving_ligand_id.residue + ].aligned_structures[canonical_site_id] ) # Align the ligand align_structure( From 576b90d704268b2fcaea97fd8ba8715bbd3b88c3 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 6 May 2024 18:01:48 +0100 Subject: [PATCH 89/90] Fix to make multiplicity increase dependent on all atoms being close rather than any Fix to make multiplicity increase dependent on all atoms being close rather than any Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site --- .../align_xmaps.py | 2 + .../generate_aligned_structures.py | 285 ++++++++++++++---- 2 files changed, 222 insertions(+), 65 deletions(-) diff --git a/src/ligand_neighbourhood_alignment/align_xmaps.py b/src/ligand_neighbourhood_alignment/align_xmaps.py index 4bb4762c..5f096cdc 100644 --- a/src/ligand_neighbourhood_alignment/align_xmaps.py +++ b/src/ligand_neighbourhood_alignment/align_xmaps.py @@ -509,6 +509,8 @@ def resample_xmap(new_xmap, aligned_res): cell = get_cell(frame_array, step) # print(cell) + print(f'Origin for xmap is now: {frame_lower_bound}') + new_map = get_new_map(cell, frame_array, frame_lower_bound, step) return new_map diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index be28141d..8dafe762 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -4,6 +4,8 @@ import gemmi import networkx as nx from loguru import logger +import numpy as np +from rich import print as rprint from ligand_neighbourhood_alignment.data import ( # Transform,; AlignableSite,; XtalForms, AssignedXtalForms, @@ -212,15 +214,15 @@ def expand_structure(_structure, xtalforms: AssignedXtalForms, moving_ligand_id) def align_structure( - _structure, - moving_ligand_id, - reference_ligand_id, - g, - transforms, - site_transforms: SiteTransforms, - canonical_site_id, - conformer_site_id, - out_path, + _structure, + moving_ligand_id, + reference_ligand_id, + g, + transforms, + site_transforms: SiteTransforms, + canonical_site_id, + conformer_site_id, + out_path, ): shortest_path = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") @@ -262,24 +264,191 @@ def align_structure( from ligand_neighbourhood_alignment import dt +def _mark_atom_pos_to_ni_pos_tup(point, mark, st): + cra = mark.to_cra(st[0]) + ni = st.cell.find_nearest_pbc_position( + point, + cra.atom.pos, + mark.image_idx) + return (ni.x, ni.y, ni.z) + + +def _drop_non_binding_chains_and_symmetrize_waters( + _structure, + neighbourhood, + moving_ligand_id, + xtalform, +): + # Get a copy of structure to edit + new_structure = _structure.clone() + + # Determine which chains have binding residues + neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) + + # Determine the assembly each chain is part of + chain_assemblies = { + _chain: _assembly for _assembly_name, _assembly in xtalform.assemblies.items() for _chain in _assembly.chains + } + + # Get the assembly the ligand is modelled as part of + lig_assembly = chain_assemblies[moving_ligand_id[1]] + + # Determine which waters are bound near the ligand, and at what positions + ns = gemmi.NeighborSearch(new_structure[0], new_structure.cell, 10).populate(include_h=False) + + # # Iterate ligand heavy atoms, finding marks and collating + all_marks = {} + atom_multiplicities = {} + for atom in new_structure[0][moving_ligand_id[1]][moving_ligand_id[2]][0]: + point = atom.pos + marks = ns.find_atoms(point, '\0', radius=5) + + # # Get cras + for mark in marks: + cra = mark.to_cra(new_structure[0]) + + if cra.residue.name != 'HOH': + continue + + base_atom_id = (cra.chain.name, str(cra.residue.seqid.num), cra.atom.name,) + + # Note first occurence of each atom + if base_atom_id not in atom_multiplicities: + atom_multiplicities[base_atom_id] = 1 + + # Get the atom id with multiplicity + atom_id = base_atom_id + (atom_multiplicities[base_atom_id],) + + pos1 = _mark_atom_pos_to_ni_pos_tup(point, mark, new_structure) + + # + if atom_id in all_marks: + # Get current marks with same base_atom_id + comparator_ids = [(a, b, c, d) for a, b, c, d in all_marks if (a, b, c) == base_atom_id] + + # Check if it is distinct from all of them + poss = [ + _mark_atom_pos_to_ni_pos_tup( + all_marks[comparator_id][0], + all_marks[comparator_id][1], + new_structure + ) + for comparator_id + in comparator_ids + ] + + # If so increase the multiplicity by one and add the atom with the new multiplicity + if not any([np.allclose(pos1, pos2, atol=0.1) for pos2 in poss]): + atom_multiplicities[base_atom_id] += 1 + atom_id = base_atom_id + (atom_multiplicities[base_atom_id],) + + # Otherwise skip + else: + continue + + all_marks[atom_id] = (point, mark, pos1) + + # Update water positions if they are near ligand but modelled elsewhere + local_water_chains = {} + chain_name_to_chain = {_chain.name: _chain for _chain in new_structure[0]} + for atom_id, (point, mark, mark_pos) in all_marks.items(): + # Get the corresponding atom + cra = mark.to_cra(new_structure[0]) + + # if not a water, skip + if cra.residue.name != 'HOH': + continue + + # If a symatom, find the symchain to associate it with + if atom_id[3] > 1: + chain_name = f'{cra.chain.name}{atom_id[3]}' + # # If associated with a new symchain, add it to the structure + if chain_name not in chain_name_to_chain: + chain = gemmi.Chain(chain_name) + new_structure[0].add_chain(chain) + chain_name_to_chain[chain_name] = chain + + # # Otherwise get the knewn chain + # else:else + chain = new_structure[0][chain_name] + + # Add the new residue, and select the relevant atom + residue = cra.residue.clone() + chain.add_residue(residue) + residue = new_structure[0][chain_name][atom_id[1]][0] + atom = residue[atom_id[2]][0] + + # Otherwise get the original atom for modification + else: + chain = cra.chain + residue = cra.residue + atom = cra.atom + + # If water update position and note chain + if residue.name == 'HOH': + # Record local water chain and seqid + if chain.name not in local_water_chains: + local_water_chains[chain.name] = [] + local_water_chains[chain.name].append(residue.seqid.num) + + # Update water position from mark + pos = atom.pos + pos.x = mark_pos[0] + pos.y = mark_pos[1] + pos.z = mark_pos[2] + + # Drop residues and non-local waters from non-binding chains containing site waters + new_chains = [] + for _model in new_structure: + for _chain in _model: + + # Create a new chain to hold symmetry waters from non-binding chains + new_chain = gemmi.Chain(_chain.name) + + # Iterate residues in the old chain, adding the local waters + for _residue in _chain: + if _residue.name == 'HOH': + if _chain.name in local_water_chains: + if _residue.seqid.num in local_water_chains[_chain.name]: + new_chain.add_residue(_residue.clone()) + else: + if (_chain.name in lig_assembly.chains) or (_chain.name in neighbourhood_chains): + new_chain.add_residue(_residue.clone()) + + if len(new_chain) > 0: + new_chains.append(new_chain) + + for new_chain in new_chains: + del new_structure[0][new_chain.name] + new_structure[0].add_chain(new_chain) + + return new_structure + def _align_structure( - _structure, - moving_ligand_id: tuple[str, str, str], - reference_ligand_id: tuple[str, str, str], - neighbourhood: dt.Neighbourhood, - g, - neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], - conformer_site_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - conformer_site_id: str, - xtalform: dt.XtalForm, - out_path: Path, + _structure, + moving_ligand_id: tuple[str, str, str], + reference_ligand_id: tuple[str, str, str], + neighbourhood: dt.Neighbourhood, + g, + neighbourhood_transforms: dict[tuple[tuple[str, str, str], tuple[str, str, str]], dt.Transform], + conformer_site_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + conformer_site_id: str, + xtalform: dt.XtalForm, + out_path: Path, ): shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") + # Drop chains without atoms in neighbourhood + reduced_structure = _drop_non_binding_chains_and_symmetrize_waters( + _structure, + neighbourhood, + moving_ligand_id, + xtalform) + previous_ligand_id = moving_ligand_id running_transform = gemmi.Transform() for next_ligand_id in shortest_path: @@ -309,41 +478,27 @@ def _align_structure( logger.debug(f"Transform from native frame to reference frame is: {gemmi_to_transform(running_transform)}") - _structure = superpose_structure(running_transform, _structure) + _structure = superpose_structure(running_transform, reduced_structure) - # Drop chains without atoms in neighbourhood - neighbourhood_chains = set([_atom_id[0] for _atom_id in neighbourhood.atoms]) - chain_assemblies = { - _chain: _assembly for _assembly_name, _assembly in xtalform.assemblies.items() for _chain in _assembly.chains - } - lig_assembly = chain_assemblies[moving_ligand_id[1]] - for _model in _structure: - for _chain in _model: - - if (_chain.name not in lig_assembly.chains) & ( - _chain.name not in neighbourhood_chains - ): # Remove any chain the ligand isn't modelled onto - - _model.remove_chain(_chain.name) # Write the fully aligned structure _structure.write_pdb(str(out_path)) def _align_reference_structure( - _structure, - dtag: str, - # moving_ligand_id: tuple[str,str,str], - # reference_ligand_id: tuple[str,str,str], - # g, - # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], - # conformer_site_transforms: dict[tuple[str, str], dt.Transform], - reference_structure_transforms: dict[tuple[str, str], dt.Transform], - # canonical_site_transforms: dict[str, dt.Transform], - canonical_site_id: str, - # conformer_site_id: str, - out_path: Path, + _structure, + dtag: str, + # moving_ligand_id: tuple[str,str,str], + # reference_ligand_id: tuple[str,str,str], + # g, + # neighbourhood_transforms: dict[tuple[tuple[str,str,str], tuple[str,str,str]], dt.Transform], + # conformer_site_transforms: dict[tuple[str, str], dt.Transform], + reference_structure_transforms: dict[tuple[str, str], dt.Transform], + # canonical_site_transforms: dict[str, dt.Transform], + canonical_site_id: str, + # conformer_site_id: str, + out_path: Path, ): running_transform = transform_to_gemmi(reference_structure_transforms[(dtag, canonical_site_id)]) @@ -353,10 +508,10 @@ def _align_reference_structure( logger.debug(f"Transform from native frame to reference frame is: {gemmi_to_transform(running_transform)}") - _structure = superpose_structure(running_transform, _structure) + new_structure = superpose_structure(running_transform, _structure) # Write the fully aligned structure - _structure.write_pdb(str(out_path)) + new_structure.write_pdb(str(out_path)) # def align_artefacts(): @@ -381,17 +536,17 @@ def _align_reference_structure( def _align_structures_from_sites( - structures, - canonical_sites: CanonicalSites, - conformer_sites: ConformerSites, - transforms: Transforms, - neighbourhoods: LigandNeighbourhoods, - xtalforms: XtalForms, - assigned_xtalforms: AssignedXtalForms, - g, - site_transforms: SiteTransforms, - # _output_dir: Path, - output: Output, + structures, + canonical_sites: CanonicalSites, + conformer_sites: ConformerSites, + transforms: Transforms, + neighbourhoods: LigandNeighbourhoods, + xtalforms: XtalForms, + assigned_xtalforms: AssignedXtalForms, + g, + site_transforms: SiteTransforms, + # _output_dir: Path, + output: Output, ): # asd = _output_dir / "aligned" # if not asd.exists(): @@ -433,10 +588,10 @@ def _align_structures_from_sites( # Get output path aod = Path(output.source_dir) output_path = ( - aod - / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ - moving_ligand_id.residue - ].aligned_structures[canonical_site_id] + aod + / output.dataset_output[moving_ligand_id.dtag][moving_ligand_id.chain][ + moving_ligand_id.residue + ].aligned_structures[canonical_site_id] ) # Align the ligand align_structure( From 9c4e08e4de164471b619759cfc335bcc84464129 Mon Sep 17 00:00:00 2001 From: ConorFWild <41680328+ConorFWild@users.noreply.github.com> Date: Mon, 6 May 2024 18:01:48 +0100 Subject: [PATCH 90/90] Fix to make multiplicity increase dependent on all atoms being close rather than any Fix to make multiplicity increase dependent on all atoms being close rather than any Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site Fix to keep hold of waters in the binding site --- .../generate_aligned_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index e2754cdc..8dafe762 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -338,7 +338,7 @@ def _drop_non_binding_chains_and_symmetrize_waters( ] # If so increase the multiplicity by one and add the atom with the new multiplicity - if not all([np.allclose(pos1, pos2, atol=0.1) for pos2 in poss]): + if not any([np.allclose(pos1, pos2, atol=0.1) for pos2 in poss]): atom_multiplicities[base_atom_id] += 1 atom_id = base_atom_id + (atom_multiplicities[base_atom_id],)