Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize QC task models to allow pre-optimization #177

Open
wants to merge 2 commits into
base: split_spec
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 43 additions & 19 deletions openff/bespokefit/executor/services/qcgenerator/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@

from openff.bespokefit.executor.services.qcgenerator import worker
from openff.bespokefit.schema.tasks import (
BaseTaskSpec,
HessianTask,
OptimizationTask,
QCGenerationTask,
Torsion1DTask,
)
from openff.bespokefit.utilities.molecule import canonical_order_atoms
Expand Down Expand Up @@ -48,18 +48,21 @@ def _canonicalize_task(task: _T) -> _T:

task.central_bond = (1, 2)

else:
elif isinstance(task, (HessianTask, OptimizationTask)):

canonical_smiles = canonical_molecule.to_smiles(
isomeric=True, explicit_hydrogens=True, mapped=False
)

else:
raise NotImplementedError()

task.smiles = canonical_smiles

return task


def _hash_task(task: QCGenerationTask) -> str:
def _hash_task(task: BaseTaskSpec) -> str:
"""Returns a hashed representation of a QC task"""
return hashlib.sha512(task.json().encode()).hexdigest()

Expand All @@ -85,6 +88,24 @@ def _cache_task_id(
redis_connection.hset("qcgenerator:task-ids", task_hash, task_id)


def _compute_hessian_task() -> str:
raise NotImplementedError()


def _compute_optimization_task(task: OptimizationTask):

if task.pre_optimization_spec is not None or task.evaluation_spec is not None:
raise NotImplementedError()

task_id = worker.compute_optimization.delay(
smiles=task.smiles,
optimization_spec_json=task.optimization_spec.json(),
n_conformers=task.n_conformers,
).id

return task_id


def _compute_torsion_drive_task(
task: Torsion1DTask, redis_connection: redis.Redis
) -> str:
Expand All @@ -94,7 +115,7 @@ def _compute_torsion_drive_task(
task_id = None

torsion_drive_task = task.copy(deep=True)
torsion_drive_task.sp_specification = None
torsion_drive_task.evaluation_spec = None

torsion_drive_hash = _hash_task(torsion_drive_task)
torsion_drive_id = _retrieve_cached_task_id(torsion_drive_hash, redis_connection)
Expand All @@ -103,19 +124,23 @@ def _compute_torsion_drive_task(

# There are no cached torsion drives at the 'pre-optimise' level of theory
# we need to run a torsion drive and then optionally a single point
if task.sp_specification is None:

torsion_drive_id = worker.compute_torsion_drive.delay(
task_json=task.json()
).id
compute_torsion_drive_func = worker.compute_torsion_drive.s(
smiles=task.smiles,
central_bond=task.central_bond,
grid_spacing=task.grid_spacing,
scan_range=task.scan_range,
optimization_spec_json=task.optimization_spec.json(),
n_conformers=task.n_conformers,
)

if task.evaluation_spec is None:
torsion_drive_id = compute_torsion_drive_func.delay().id
else:

task_future: AsyncResult = (
worker.compute_torsion_drive.s(task_json=task.json())
| worker.evaluate_torsion_drive.s(
model_json=task.sp_specification.model.json(),
program=task.sp_specification.program,
compute_torsion_drive_func
| worker.re_evaluate_torsion_drive.s(
evaluation_spec_json=task.evaluation_spec.json(),
)
).delay()

Expand All @@ -126,7 +151,7 @@ def _compute_torsion_drive_task(
torsion_drive_id, task.type, torsion_drive_hash, redis_connection
)

if task.sp_specification is None:
if task.evaluation_spec is None:
return torsion_drive_id

if task_id is None:
Expand All @@ -136,9 +161,8 @@ def _compute_torsion_drive_task(
task_id = (
(
worker.wait_for_task.s(torsion_drive_id)
| worker.evaluate_torsion_drive.s(
model_json=task.sp_specification.model.json(),
program=task.sp_specification.program,
| worker.re_evaluate_torsion_drive.s(
evaluation_spec_json=task.evaluation_spec.json(),
)
)
.delay()
Expand Down Expand Up @@ -168,9 +192,9 @@ def cached_compute_task(
if isinstance(task, Torsion1DTask):
task_id = _compute_torsion_drive_task(task, redis_connection)
elif isinstance(task, OptimizationTask):
task_id = worker.compute_optimization.delay(task_json=task.json()).id
task_id = _compute_optimization_task(task)
elif isinstance(task, HessianTask):
task_id = worker.compute_hessian.delay(task_json=task.json()).id
task_id = _compute_hessian_task()
else:
raise NotImplementedError()

Expand Down
94 changes: 50 additions & 44 deletions openff/bespokefit/executor/services/qcgenerator/worker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional, Tuple

import psutil
import qcelemental
Expand All @@ -9,7 +9,7 @@
from celery.utils.log import get_task_logger
from openff.toolkit.topology import Atom, Molecule
from qcelemental.models import AtomicInput, AtomicResult
from qcelemental.models.common_models import DriverEnum, Model, Provenance
from qcelemental.models.common_models import DriverEnum, Provenance
from qcelemental.models.procedures import (
OptimizationInput,
OptimizationResult,
Expand All @@ -25,7 +25,7 @@
from openff.bespokefit.executor.services import current_settings
from openff.bespokefit.executor.utilities.celery import configure_celery_app
from openff.bespokefit.executor.utilities.redis import connect_to_default_redis
from openff.bespokefit.schema.tasks import OptimizationTask, Torsion1DTask
from openff.bespokefit.schema.tasks import OptimizationSpec, SinglePointSpec

celery_app = configure_celery_app(
"qcgenerator", connect_to_default_redis(validate=False)
Expand Down Expand Up @@ -66,23 +66,30 @@ def _select_atom(atoms: List[Atom]) -> int:


@celery_app.task(acks_late=True)
def compute_torsion_drive(task_json: str) -> str:
def compute_torsion_drive(
smiles: str,
central_bond: Tuple[int, int],
grid_spacing: int,
scan_range: Optional[Tuple[int, int]],
optimization_spec_json: str,
n_conformers: int,
) -> str:
"""Runs a torsion drive using QCEngine."""

task = Torsion1DTask.parse_raw(task_json)

_task_logger.info(f"running 1D scan with {_task_config()}")

molecule: Molecule = Molecule.from_smiles(task.smiles)
molecule.generate_conformers(n_conformers=task.n_conformers)
optimization_spec = OptimizationSpec.parse_raw(optimization_spec_json)

molecule: Molecule = Molecule.from_smiles(smiles)
molecule.generate_conformers(n_conformers=n_conformers)

map_to_atom_index = {
map_index: atom_index
for atom_index, map_index in molecule.properties["atom_map"].items()
}

index_2 = map_to_atom_index[task.central_bond[0]]
index_3 = map_to_atom_index[task.central_bond[1]]
index_2 = map_to_atom_index[central_bond[0]]
index_3 = map_to_atom_index[central_bond[1]]

index_1_atoms = [
atom
Expand All @@ -107,8 +114,8 @@ def compute_torsion_drive(task_json: str) -> str:
_select_atom(index_4_atoms),
)
],
grid_spacing=[task.grid_spacing],
dihedral_ranges=[task.scan_range] if task.scan_range is not None else None,
grid_spacing=[grid_spacing],
dihedral_ranges=[scan_range] if scan_range is not None else None,
),
extras={
"canonical_isomeric_explicit_hydrogen_mapped_smiles": molecule.to_smiles(
Expand All @@ -119,14 +126,14 @@ def compute_torsion_drive(task_json: str) -> str:
molecule.to_qcschema(conformer=i) for i in range(molecule.n_conformers)
],
input_specification=QCInputSpecification(
model=task.model,
model=optimization_spec.model,
driver=DriverEnum.gradient,
),
optimization_spec=OptimizationSpecification(
procedure=task.optimization_spec.program,
procedure=optimization_spec.procedure.program,
keywords={
**task.optimization_spec.dict(exclude={"program", "constraints"}),
"program": task.program,
**optimization_spec.procedure.dict(exclude={"program", "constraints"}),
"program": optimization_spec.program,
},
),
)
Expand All @@ -146,21 +153,18 @@ def compute_torsion_drive(task_json: str) -> str:


@celery_app.task(acks_late=True)
def evaluate_torsion_drive(
def re_evaluate_torsion_drive(
result_json: str,
model_json: str,
program: str,
evaluation_spec_json: str,
) -> str:
"""
Re-evaluates the energies at each optimised geometry along a torsion drive
at a new level of theory.
"""

model = Model.parse_raw(model_json)
evaluation_spec = SinglePointSpec.parse_raw(evaluation_spec_json)

_task_logger.info(
f"performing single point evaluations using {model} and {program}"
)
_task_logger.info(f"performing single point evaluations using {evaluation_spec}")

original_result = TorsionDriveResult.parse_raw(result_json)

Expand All @@ -170,8 +174,7 @@ def evaluate_torsion_drive(
energies = {
grid_point: _compute_single_point(
molecule=molecule,
model=model,
program=program,
spec=evaluation_spec,
config=qcengine_config,
).return_result
for grid_point, molecule in original_result.final_molecules.items()
Expand All @@ -181,7 +184,7 @@ def evaluate_torsion_drive(
keywords=original_result.keywords,
extras=original_result.extras,
input_specification=QCInputSpecification(
driver=DriverEnum.gradient, model=model
driver=DriverEnum.gradient, model=evaluation_spec.model
),
initial_molecule=original_result.initial_molecule,
optimization_spec=original_result.optimization_spec,
Expand All @@ -199,32 +202,34 @@ def evaluate_torsion_drive(

@celery_app.task(acks_late=True)
def compute_optimization(
task_json: str,
smiles: str,
optimization_spec_json: str,
n_conformers: int,
) -> List[OptimizationResult]:
"""Runs a set of geometry optimizations using QCEngine."""
# TODO: should we only return the lowest energy optimization?
# or the first optimisation to work?

task = OptimizationTask.parse_raw(task_json)
# or the first optimisation to work?

_task_logger.info(f"running opt with {_task_config()}")

molecule: Molecule = Molecule.from_smiles(task.smiles)
molecule.generate_conformers(n_conformers=task.n_conformers)
optimization_spec = OptimizationSpec.parse_raw(optimization_spec_json)

molecule: Molecule = Molecule.from_smiles(smiles)
molecule.generate_conformers(n_conformers=n_conformers)

input_schemas = [
OptimizationInput(
keywords={
**task.optimization_spec.dict(exclude={"program", "constraints"}),
"program": task.program,
**optimization_spec.procedure.dict(exclude={"program", "constraints"}),
"program": optimization_spec.program,
},
extras={
"canonical_isomeric_explicit_hydrogen_mapped_smiles": molecule.to_smiles(
isomeric=True, explicit_hydrogens=True, mapped=True
)
},
input_specification=QCInputSpecification(
model=task.model,
model=optimization_spec.model,
driver=DriverEnum.gradient,
),
initial_molecule=molecule.to_qcschema(conformer=i),
Expand All @@ -238,7 +243,7 @@ def compute_optimization(

return_value = qcengine.compute_procedure(
input_schema,
task.optimization_spec.program,
optimization_spec.procedure.program,
raise_error=True,
local_options=_task_config(),
)
Expand All @@ -257,26 +262,27 @@ def compute_optimization(
return serialize(return_values, "json")


@celery_app.task(acks_late=True)
def compute_hessian(task_json: str) -> AtomicResult:
"""Runs a set of hessian evaluations using QCEngine."""
raise NotImplementedError()
# @celery_app.task(acks_late=True)
# def compute_hessian() -> AtomicResult:
# """Runs a set of hessian evaluations using QCEngine."""
# raise NotImplementedError()


def _compute_single_point(
molecule: qcelemental.models.Molecule,
model: Model,
program: str,
spec: SinglePointSpec,
config: Dict[str, Any],
) -> AtomicResult:
"""
Perform a single point calculation on the input ``qcelemental`` molecule.
"""

qc_input = AtomicInput(molecule=molecule, driver=DriverEnum.energy, model=model)
qc_input = AtomicInput(
molecule=molecule, driver=DriverEnum.energy, model=spec.model
)
return qcengine.compute(
input_data=qc_input,
program=program,
program=spec.program,
raise_error=True,
local_options=config,
)
Expand Down
Loading