Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for multiple forcefields #2

Merged
merged 9 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ ignored-classes=optparse.Values,thread._local,_thread._local
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis. It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=
ignored-modules=openmm

# Show a hint with possible names when a member name was not found. The aspect
# of finding the hint is based on edit distance.
Expand Down
19 changes: 19 additions & 0 deletions pymatgen/io/openmm/tests/test_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,22 @@ def test_get_input_set_w_charges(self):
"state.xml",
}
assert input_set.validate()

def test_get_input_set_w_charges_and_forcefields(self):
Comment on lines +76 to +77
Copy link
Owner

@orionarcher orionarcher Feb 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should add a more diverse set of tests and test more of the possible forcefields. This would be a great case for pytest parameterize, which would let you test a long list of forcefield inputs with almost complete code re-use.

I also think we should make sure that this is actually doing what we expect. Currently, we are just testing to make sure that it doesn't fail, but there is no confirmation that the Systems are actually parameterized differently.

We could test parameterization by instantiating a system using input_set['system.xml'].get_system() and then using the System.getForces() iterator to loop through the forces.

Even just looping through all the forces and confirming that they are different for different force fields is enough. I'd recommend testing this with just a lone water molecule and with a lone ethanol (or any organic). That'll keep the resulting Forces arrays small. Just generating the Forces and then using those values as a test would be sufficient, no need to look up in the Sage or GAFF docs what the values are supposed to be.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great, this will take me a bit longer to get to but at the very least will have it done by next week

pf6_charge_array = np.load(PF6_charges)
li_charge_array = np.load(Li_charges)
generator = OpenMMSolutionGen(
partial_charges=[(PF6_xyz, pf6_charge_array), (Li_xyz, li_charge_array)],
partial_charge_scaling={"Li": 0.9, "PF6": 0.9},
packmol_random_seed=1,
force_field={"O": "spce"},
)
input_set = generator.get_input_set({"O": 200, "CCO": 20, "F[P-](F)(F)(F)(F)F": 10, "[Li+]": 10}, density=1)
assert isinstance(input_set, OpenMMSet)
assert set(input_set.inputs.keys()) == {
"topology.pdb",
"system.xml",
"integrator.xml",
"state.xml",
}
assert input_set.validate()
241 changes: 144 additions & 97 deletions pymatgen/io/openmm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from openff.toolkit.typing.engines import smirnoff
from openff.toolkit.typing.engines.smirnoff.parameters import LibraryChargeHandler
import openmm
from openmm.unit import elementary_charge, angstrom
from openmm.unit import elementary_charge
from openmm.app import Topology
from openmm.app import ForceField as omm_ForceField
from openmm.app.forcefield import PME
Expand Down Expand Up @@ -299,7 +299,7 @@ def assign_charges_to_mols(
This will modify the original force field, not make a copy.

Args:
forcefield: force field that will have partial charges added.
smile_strings: A list of SMILEs strings
partial_charge_scaling: A dictionary of partial charge scaling for particular species. Keys
are SMILEs and values are the scaling factor.
partial_charges: A list of tuples, where the first element of each tuple is a molecular
Expand All @@ -308,7 +308,7 @@ def assign_charges_to_mols(
same atom ordering.

Returns:
forcefield with partial charges added.
List of charged openff Molecules
"""
# loop through partial charges to add to force field
matched_mols = set()
Expand Down Expand Up @@ -345,6 +345,39 @@ def assign_charges_to_mols(
return charged_mols


def assign_small_molecule_ff(molecules: List[openff.toolkit.topology.Molecule], forcefield_name: str):
orionarcher marked this conversation as resolved.
Show resolved Hide resolved
"""

Parameters
----------
molecules
forcefield_name

Returns
OpenMM Template
-------

"""
smirnoff_ff_names = SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS
gaff_ff_names = GAFFTemplateGenerator.INSTALLED_FORCEFIELDS
template = None
if forcefield_name == "sage" or forcefield_name in smirnoff_ff_names:
ff_name = "openff-2.0.0" if forcefield_name == "sage" else forcefield_name
template = SMIRNOFFTemplateGenerator(molecules=molecules, forcefield=ff_name)

elif forcefield_name == "gaff" or forcefield_name in gaff_ff_names:
ff_name = "gaff-2.11" if forcefield_name == "gaff" else forcefield_name
template = GAFFTemplateGenerator(molecules=molecules, forcefield=ff_name)
else:
raise NotImplementedError(
f"{forcefield_name} is not supported."
f"currently only these force fields are supported:"
f" {' '.join(smirnoff_ff_names+gaff_ff_names)}.\n"
f"Please select one of the supported force fields."
)
return template


def parameterize_system(
topology: Topology,
smile_strings: List[str],
Expand All @@ -360,111 +393,125 @@ def parameterize_system(
Args:
topology: an OpenMM topology.
smile_strings: a list of SMILEs representing each molecule in the system.
box: list of [xlo, ylo, zlo, xhi, yhi, zhi].
force_field: name of the force field. Currently only Sage is supported.
partial_charge_method:
partial_charge_scaling:
partial_charges:
box: list of [xlo, ylo, zlo, xhi, yhi, zhi] in nanometers.
force_field: Name of the force field or dict of forcefields for each
small molecule, e.g. {"O": "spce"}. Small molecule
forcefields and water models can either be provided
informally, i.e. "gaff" or "sage" for small molecules,
or "spce", "tip3p", or "tip4p" for water, or can be
formally defined with OpenMM filenames,
e.g. "charmm36/water.xml". Large molecule forcefields
must be specified with the full path,
e.g. "amber14/protein.ff14SB.xml".
partial_charge_method: Method for OpenFF partial charge assignment
for small molecules without charges provided
in partial_charges
partial_charge_scaling: Scaling for partial charges, as a dict
of {str: float, . . .}, e.g. {"[Li+]": 0.8}
partial_charges: List of tuples of (molecule, charges).
The Molecule can be a Pymatgen Molecule or the
path to a structure file that can be parsed by
Pymatgen. Charges is a numpy array with length equal
to the number of sites in the molecule.

Returns:
an OpenMM.system
"""

partial_charge_scaling = partial_charge_scaling if partial_charge_scaling else {}
partial_charges = partial_charges if partial_charges else []
supported_force_fields = ["Sage"]
basic_water_ffs = ["tip3p", "spce", "tip4p"]
basic_small_ffs = ["gaff", "sage"]

all_small_ffs = SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS + GAFFTemplateGenerator.INSTALLED_FORCEFIELDS

forcefield_omm = omm_ForceField()
if isinstance(force_field, str):
if force_field.lower() == "sage":
openff_forcefield = smirnoff.ForceField("openff_unconstrained-2.0.0.offxml")
charged_openff_mols = assign_charges_to_mols(
smile_strings,
partial_charge_method,
partial_charge_scaling,
partial_charges,
)
openff_topology = openff.toolkit.topology.Topology.from_openmm(topology, charged_openff_mols)
box_vectors = list(np.array(box[3:6]) - np.array(box[0:3])) * angstrom
openff_topology.box_vectors = box_vectors
system = openff_forcefield.create_openmm_system(
openff_topology,
charge_from_molecules=charged_openff_mols,
allow_nonintegral_charges=True,
)
return system
ff_name = force_field.lower()
charged_off_mols = assign_charges_to_mols(
smile_strings=smile_strings,
partial_charges=partial_charges,
partial_charge_scaling=partial_charge_scaling,
partial_charge_method=partial_charge_method,
)
orionarcher marked this conversation as resolved.
Show resolved Hide resolved
template = assign_small_molecule_ff(molecules=charged_off_mols, forcefield_name=ff_name)
forcefield_omm.registerTemplateGenerator(template.generator)

else:
# TODO: Make decisions for user about ff name
# TODO: Dict instead of list of tuples
# TODO: Make periodic
# TODO: figure out how to add partial charges
small_ffs = [
"smirnoff99Frosst-1.0.2",
"smirnoff99Frosst-1.0.0",
"smirnoff99Frosst-1.1.0",
"smirnoff99Frosst-1.0.4",
"smirnoff99Frosst-1.0.8",
"smirnoff99Frosst-1.0.6",
"smirnoff99Frosst-1.0.3",
"smirnoff99Frosst-1.0.1",
"smirnoff99Frosst-1.0.5",
"smirnoff99Frosst-1.0.9",
"smirnoff99Frosst-1.0.7",
"openff-1.0.1",
"openff-1.1.1",
"openff-1.0.0-RC1",
"openff-1.2.0",
"openff-1.3.0",
"openff-2.0.0-rc.2",
"openff-2.0.0",
"openff-1.1.0",
"openff-1.0.0",
"openff-1.0.0-RC2",
"openff-1.3.1",
"openff-1.2.1",
"openff-1.3.1-alpha.1",
"openff-2.0.0-rc.1",
"gaff-1.4",
"gaff-1.8",
"gaff-1.81",
"gaff-2.1",
"gaff-2.11",
]
small_molecules = {}
large_or_water = {}
# iterate through each smile, if no forcefielded provided use Sage
# iterate through each molecule and forcefield input as list
for smile, ff_name in force_field.items():
openff_mol = openff.toolkit.topology.Molecule.from_smiles(smile)
# Assign mols and forcefield as small molecule vs AMBER or
# CHARMM
if ff_name.lower() in small_ffs:
small_molecules[openff_mol] = ff_name.lower()
# Add charges to the molecule if provided
charged_mols = assign_charges_to_mols(
smile_strings=smile_strings,
partial_charges=partial_charges,
partial_charge_method=partial_charge_method,
partial_charge_scaling=partial_charge_scaling,
)
Comment on lines +498 to +503
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we pull this out front and call it once for small molecules and biopolymer_or_water?

for smile, charged_mol in zip(smile_strings, charged_mols):
if smile in force_field.keys():
ff_name = force_field[smile]
if ff_name.lower() in all_small_ffs or ff_name.lower() in basic_small_ffs:
small_molecules[charged_mol] = ff_name
else:
large_or_water[charged_mol] = ff_name
else:
large_or_water[openff_mol] = ff_name.lower()
forcefield_omm = omm_ForceField()
small_molecules[charged_mol] = "sage"

# Determine category of forcefield for deciding which water model
large_ff_category = None
for ff in large_or_water.values():
temp_ff_string = None
if "amber" in ff.lower():
temp_ff_string = "amber"
elif "charmm" in ff.lower():
temp_ff_string = "charmm"
elif "amoeba" in ff.lower():
temp_ff_string = "amoeba"
else:
continue
orionarcher marked this conversation as resolved.
Show resolved Hide resolved
if large_ff_category is None:
large_ff_category = temp_ff_string
else:
if large_ff_category != temp_ff_string:
warnings.warn(f"Did you mean to mix {temp_ff_string} and " f"{large_ff_category} force fields?")
orionarcher marked this conversation as resolved.
Show resolved Hide resolved

ff_to_load = None
water_assignment = {
"amber": {"spce": "amber14/spce.xml", "tip3p": "amber14/tip3p.xml", "tip4p": "amber14/tip4pew.xml"},
"charmm": {"spce": "charmm36/spce.xml", "tip3p": "charmm36/water.xml", "tip4p": "charmm36/tip4pew.xml"},
orionarcher marked this conversation as resolved.
Show resolved Hide resolved
}
for ff in large_or_water.values():
forcefield_omm.loadFile(ff)
for mol, ff in small_molecules.items():
if "gaff" in ff:
gaff = GAFFTemplateGenerator(molecules=mol, forcefield=ff)
forcefield_omm.registerTemplateGenerator(gaff.generator)
elif "smirnoff" in ff or "openff" in ff:
sage = SMIRNOFFTemplateGenerator(molecules=mol, forcefield=ff)
forcefield_omm.registerTemplateGenerator(sage.generator())
box_size = min(box[3] - box[0], box[4] - box[1], box[5] - box[2])
nonbondedCutoff = min(10, box_size // 2)
periodic_box_vectors = np.multiply(
np.array(
[
[box[3] - box[0], 0, 0],
[0, box[4] - box[1], 0],
[0, 0, box[5] - box[2]],
]
),
0.1,
) # needs to be nanometers, assumes box in angstroms
topology.setPeriodicBoxVectors(vectors=periodic_box_vectors)
system = forcefield_omm.createSystem(topology=topology, nonbondedMethod=PME, nonbondedCutoff=nonbondedCutoff)
return system
raise NotImplementedError(
f"currently only these force fields are supported: {' '.join(supported_force_fields)}.\n"
f"Please select one of the supported force fields."
if ff in basic_water_ffs:
# Ensure the water model matches the large molecule model
if large_ff_category:
if ff in water_assignment[large_ff_category].keys():
ff_to_load = water_assignment[large_ff_category][ff]
else:
warnings.warn(f"Did you mean to use {ff} with the " f"{large_ff_category} force field?")
# If there isn't a large molecule forcefield required,
# assume amber14
else:
ff_to_load = water_assignment["amber"][ff]
# TODO: Add lookup to ensure the ff is allowable
else:
ff_to_load == ff
forcefield_omm.loadFile(ff_to_load)
# Add small molecules to forcefield
for mol, ff_name in small_molecules.items():
template = assign_small_molecule_ff(molecules=[mol], forcefield_name=ff_name)
forcefield_omm.registerTemplateGenerator(template.generator)
orionarcher marked this conversation as resolved.
Show resolved Hide resolved

box_size = min(box[3] - box[0], box[4] - box[1], box[5] - box[2])
nonbondedCutoff = min(10, box_size // 2)
# TODO: Make insensitive to input units
periodic_box_vectors = np.array(
[
[box[3] - box[0], 0, 0],
[0, box[4] - box[1], 0],
[0, 0, box[5] - box[2]],
]
)
topology.setPeriodicBoxVectors(vectors=periodic_box_vectors)
system = forcefield_omm.createSystem(topology=topology, nonbondedMethod=PME, nonbondedCutoff=nonbondedCutoff)
return system