Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Classifying geometry based on RMSD #213

Merged
merged 13 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions molSimplify/Classes/globalvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import subprocess
from typing import Dict, Tuple
from molSimplify.utils.metaclasses import Singleton
import numpy as np

# Dictionary containing atomic mass, atomic number, covalent radius, num valence electrons
# Data from http://www.webelements.com/ (last accessed May 13th 2015)
Expand Down Expand Up @@ -463,6 +464,28 @@
[[70.5, 70.5, 67.7, 67.7, 120, 120, 135.5, 135.5]]
}

#need each point to be distance 1 from the origin
deg_to_rad = 2*np.pi / 360
all_polyhedra = {
"linear": np.array([(1, 0, 0), (-1, 0, 0)]),
"bent": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0)]),
"trigonal planar": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"T shape": np.array([(1, 0, 0), (np.cos(90*deg_to_rad), np.sin(90*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0)]),
"trigonal pyramidal": np.array([(1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"tetrahedral": np.array([(1, 1, 1), (1, -1, -1), (-1, 1, -1), (-1, -1, 1)])/np.sqrt(3),
"square planar": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0)]),
"seesaw": np.array([(1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"trigonal bipyramidal": np.array([(0, 0, 1), (0, 0, -1), (1, 0, 0), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 0), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 0)]),
"square pyramidal": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1)]),
"pentagonal planar": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0)]),
"octahedral": np.array([(1, 0, 0), (0, 1, 0), (-1, 0, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]),
"pentagonal pyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1)]),
"trigonal prismatic": np.array([(1, 0, 1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1), (1, 0, -1), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)])/np.sqrt(2),
"pentagonal bipyramidal": np.array([(1, 0, 0), (np.cos(72*deg_to_rad), np.sin(72*deg_to_rad), 0), (np.cos(144*deg_to_rad), np.sin(144*deg_to_rad), 0), (np.cos(216*deg_to_rad), np.sin(216*deg_to_rad), 0), (np.cos(288*deg_to_rad), np.sin(288*deg_to_rad), 0), (0, 0, 1), (0, 0, -1)]),
"square antiprismatic": np.array([(1, 0, 1), (0, 1, 1), (-1, 0, 1), (0, -1, 1), (np.cos(45*deg_to_rad), np.sin(45*deg_to_rad), -1), (np.cos(135*deg_to_rad), np.sin(135*deg_to_rad), -1), (np.cos(225*deg_to_rad), np.sin(225*deg_to_rad), -1), (np.cos(315*deg_to_rad), np.sin(315*deg_to_rad), -1)])/np.sqrt(2),
"tricapped trigonal prismatic": np.array([(1, 0, 1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), 1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), 1)/np.sqrt(2), (1, 0, -1)/np.sqrt(2), (np.cos(120*deg_to_rad), np.sin(120*deg_to_rad), -1)/np.sqrt(2), (np.cos(240*deg_to_rad), np.sin(240*deg_to_rad), -1)/np.sqrt(2), (np.cos(60*deg_to_rad), np.sin(60*deg_to_rad), 0), (np.cos(180*deg_to_rad), np.sin(180*deg_to_rad), 0), (np.cos(300*deg_to_rad), np.sin(300*deg_to_rad), 0)])
}

# Module for running bash commands
# @param cmd String containing command to be run
# @return bash output string
Expand Down Expand Up @@ -720,6 +743,16 @@ def get_all_angle_refs(self):
"""
return all_angle_refs

def get_all_polyhedra(self):
"""Get reference polyhedra dict.

Returns
-------
all_polyhedra : dict
Reference polyhedra for various geometries.
"""
return all_polyhedra

def add_custom_path(self, path):
"""Record custom path in ~/.molSimplify file

Expand Down
183 changes: 179 additions & 4 deletions molSimplify/Classes/mol3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from molSimplify.Scripts.geometry import (distance, connectivity_match,
vecangle, rotation_params,
rotate_around_axis)
from molSimplify.Scripts.rmsd import rigorous_rmsd
from molSimplify.Scripts.rmsd import rigorous_rmsd, kabsch_rmsd, kabsch_rotate

try:
import PyQt5 # noqa: F401
Expand Down Expand Up @@ -1979,7 +1979,7 @@
'Error, mol3D could not understand connectivity in mol')
return nats

def getBondedAtomsSmart(self, idx, oct=True, strict_cutoff=False, catom_list=None):
def getBondedAtomsSmart(self, idx, oct=False, strict_cutoff=False, catom_list=None):
"""
Get the atoms bonded with the atom specified with the given index, using the molecular graph.
Creates graph if it does not exist.
Expand Down Expand Up @@ -4938,8 +4938,8 @@
self.get_num_coord_metal(debug=False, strict_cutoff=strict_cutoff, catom_list=catom_list)
catoms = self.catoms
# print(catoms, [self.getAtom(x).symbol() for x in catoms])
if len(catoms) > 6:
_, catoms = self.oct_comp(debug=False)
# if len(catoms) > 6:
# _, catoms = self.oct_comp(debug=False)
Fixed Show fixed Hide fixed
fcs = [metalind] + catoms
return fcs

Expand Down Expand Up @@ -5683,6 +5683,181 @@
}
return results

def get_geometry_type_distance(self, max_dev=1e6, close_dev=1e-2,
flag_catoms=False, catoms_arr=None,
skip=False, transition_metals_only=False):
"""
Get the type of the geometry (available options in globalvars all_geometries).

uses hapticity truncated first coordination shell.
Does not require the input of num_coord.

Parameters
----------
max_dev : float, optional
Maximum RMSD allowed between a structure and an ideal geometry before it is classified as unknown. Default is 1e6.
close_dev : float, optional
Maximum difference in RMSD between two classifications allowed before they are compared by maximum single-atom deviation as well.
flag_catoms : bool, optional
Whether or not to return the catoms arr. Default as False.
catoms_arr : Nonetype, optional
Uses the catoms of the mol3D by default. User and overwrite this connection atom array by explicit input.
Default is Nonetype.
skip : list, optional
Geometry checks to skip. Default is False.
transition_metals_only : bool, optional
Flag for considering more than just transition metals as metals. Default is False.

Returns
-------
results : dictionary
Contains the classified geometry and the RMSD from an ideal structure.
Summary contains a list of the RMSD and the maximum single-atom deviation for all considered geometry types.

"""

first_shell, hapt = self.get_first_shell()
num_coord = first_shell.natoms - 1
all_geometries = globalvars().get_all_geometries()
all_polyhedra = globalvars().get_all_polyhedra()
summary = {}

if len(first_shell.graph): # Find num_coord based on metal_cn if graph is assigned
if len(first_shell.findMetal()) > 1:
raise ValueError('Multimetal complexes are not yet handled.')
elif len(first_shell.findMetal(transition_metals_only=transition_metals_only)) == 1:
num_coord = len(first_shell.getBondedAtomsSmart(first_shell.findMetal(transition_metals_only=transition_metals_only)[0]))
else:
raise ValueError('No metal centers exist in this complex.')
if catoms_arr is not None and len(catoms_arr) != num_coord:
raise ValueError("num_coord and the length of catoms_arr do not match.")

if num_coord not in list(all_geometries.keys()):
#should we indicate somehow that these are unknown due to a different coordination number?
results = {
"geometry": "unknown",
"rmsd": False,
"summary": {},
"hapticity": hapt,
}
return results

possible_geometries = all_geometries[num_coord]

#for each same-coordinated geometry, get the minimum RMSD and the maximum single-atom deviation in that pairing
for geotype in possible_geometries:
rmsd_calc, max_dist = self.dev_from_ideal_geometry(all_polyhedra[geotype])
summary.update({geotype: [rmsd_calc, max_dist]})

close_rmsds = False
current_rmsd, geometry = max_dev, "unknown"
for geotype in summary:
#if the RMSD for this structure is the lowest seen so far (within a threshold)
if summary[geotype][0] < (current_rmsd + close_dev):
#if the RMSDs are close, flag this in the summary and classify on second criterion
if np.abs(summary[geotype][0] - current_rmsd) < close_dev:
close_rmsds = True
if summary[geotype][1] < summary[geometry][1]:
#classify based on largest singular deviation
current_rmsd = summary[geotype][0]
geometry = geotype
else:
current_rmsd = summary[geotype][0]
geometry = geotype

results = {
"geometry": geometry,
"rmsd": current_rmsd,
"summary": summary,
"hapticity": hapt,
"close_rmsds": close_rmsds
}
return results

def dev_from_ideal_geometry(self, ideal_polyhedron):
"""
Return the minimum RMSD between a geometry and an ideal polyhedron (with the same average bond distances).
Enumerates all possible indexing of the geometry. As such, only recommended for small systems.

Parameters
----------
ideal_polyhedron: np.array of 3-tuples of coordinates
Reference list of points for an ideal geometry

Returns
-------
rmsd: float
Minimum root mean square distance between the fed geometry and the ideal polyhedron
single_dev: float
Maximum distance between any paired points in the fed geometry and the ideal polyhedron.
"""

metal_idx = self.findMetal()
if len(metal_idx) == 0:
raise ValueError('No metal centers exist in this complex.')
elif len(metal_idx) != 1:
raise ValueError('Multimetal complexes are not yet handled.')
temp_mol = self.get_first_shell()[0]
fcs_indices = temp_mol.get_fcs()
# remove metal index from first coordination shell
fcs_indices.remove(temp_mol.findMetal()[0])

if len(fcs_indices) != len(ideal_polyhedron):
raise ValueError('The coordination number differs between the two provided structures.')

#have to redo getting metal_idx with the new mol after running get_first_shell
#want to work with temp_mol since it has the edge and sandwich logic implemented to replace those with centroids
metal_atom = temp_mol.getAtoms()[temp_mol.findMetal()[0]]
fcs_atoms = [temp_mol.getAtoms()[i] for i in fcs_indices]
#construct a np array of the non-metal atoms in the FCS
distances = []
positions = np.zeros([len(fcs_indices), 3])
for idx, atom in enumerate(fcs_atoms):
distance = atom.distance(metal_atom)
distances.append(distance)
positions[idx, :] = np.array(atom.coords()) - np.array(metal_atom.coords()) #shift so the metal is at (0, 0, 0)

def permutations(list):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

testing this function, it seems that it could be replaced by itertools.permutations()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which I have done in my recent changes

Copy link
Contributor Author

@aarongarrison aarongarrison May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that get_first_shell is limited to a maximum coordination number of 6: it uses the function obtain_truncation_metal to get the first shell (before applying any edge/sandwich logic), which determines neighbors using the getBondedAtomsSmart function. This function, as shown in commit 79272c1, assumes a maximum coordination number of 6. If the default behavior of getBondedAtomsSmart is set to not assume a maximum of 6-coordinated structures, some of the tests will fail for geometry checks, so I do not want to change that behavior. However, changing the oct argument to False will allow for get_geometry_type_distance to classify geometries with coordination numbers higher than 6, which I have tested. I can introduce a workaround variable to allow for different behavior (such as in commit 46abd16), but I do not want to add that argument to more functions than needed (since readability on those extra arguments would be difficult to interpret). I would argue that we should change the test cases to add oct=True to getBondedAtomsSmart (since that function should not assume a maximum coordination number), but I am not sure if that would also require adding arguments to many functions to ensure that the test cases work as intended.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just spent some time trying to understand why these 4 test cases:

  • test_geocheck_oct[broken_ligands]
  • test_geocheck_oct[H_transfer]
  • test_geocheck_oct[rotational_group]
  • test_geocheck_oct[iodine_sulfur]

fail when we set oct=False as default in getBondedAtomsSmart and got nowhere... My recommendation for now is just to merge what you have and address this issue during the hackathon on higher coordination complexes.

'Returns all possible permutations of a list.'
if len(list) == 0:
return []
elif len(list) == 1:
return [list]
l = []
for i in range(len(list)):
m = list[i]
remaining = list[:i] + list[i+1:]
for p in permutations(remaining):
l.append([m] + p)
return l

current_min = np.inf
orders = permutations(list(range(len(ideal_polyhedron))))
max_dist = 0

#if desired, make it so the ideal polyhedron has same average bond distance as the mol
#scaled_polyhedron = ideal_polyhedron * np.mean(np.array(distances))

#for all possible assignments, find RMSD between ideal and actual structure
ideal_positions = np.zeros([len(fcs_indices), 3])
for order in orders:
for i in range(len(order)):
#if you wanted to use the same average bond length for all, use the following
#ideal_positions[i, :] = scaled_polyhedron[order[i]]
#if you want to let each ligand scale its length independently, uncomment the following
ideal_positions[i, :] = ideal_polyhedron[order[i]] * distances[i]
rmsd_calc = kabsch_rmsd(ideal_positions, positions)
if rmsd_calc < current_min:
current_min = rmsd_calc
#calculate and store the maximum pairwise distance
rot_ideal = kabsch_rotate(ideal_positions, positions)
diff_matrix = rot_ideal - positions
pairwise_dists = np.sum(diff_matrix**2, axis=1)
max_dist = np.max(pairwise_dists)

#return minimum RMSD, maximum pairwise distance in that structure
return current_min, max_dist

def get_features(self, lac=True, force_generate=False, eq_sym=False,
use_dist=False, NumB=False, Gval=False, size_normalize=False,
alleq=False, strict_cutoff=False, catom_list=None, MRdiag_dict={}, depth=3):
Expand Down
Loading