diff --git a/devtools/conda-envs/test_env.yaml b/devtools/conda-envs/test_env.yaml index 02eeef0a..a53e0376 100644 --- a/devtools/conda-envs/test_env.yaml +++ b/devtools/conda-envs/test_env.yaml @@ -22,4 +22,4 @@ dependencies: - pytest - pytest-pep8 - pytest-cov -- codecov \ No newline at end of file +- codecov diff --git a/doc/sphinx/source/workflows.txt b/doc/sphinx/source/workflows.txt index 8d2e42ec..c6ec8918 100644 --- a/doc/sphinx/source/workflows.txt +++ b/doc/sphinx/source/workflows.txt @@ -17,3 +17,4 @@ for use with :class:`~mdpow.analysis.dihedral.DihedralAnalysis`. workflows/base workflows/registry workflows/dihedrals + workflows/solvations diff --git a/doc/sphinx/source/workflows/solvations.txt b/doc/sphinx/source/workflows/solvations.txt new file mode 100644 index 00000000..44c068e7 --- /dev/null +++ b/doc/sphinx/source/workflows/solvations.txt @@ -0,0 +1,7 @@ +================================== +Automated Solvation Shell Analysis +================================== + +.. versionadded:: 0.9.0 + +.. automodule:: mdpow.workflows.solvations \ No newline at end of file diff --git a/mdpow/analysis/solvation.py b/mdpow/analysis/solvation.py index 6ed6137a..52dc804d 100644 --- a/mdpow/analysis/solvation.py +++ b/mdpow/analysis/solvation.py @@ -56,21 +56,38 @@ def __init__(self, solute: EnsembleAtomGroup, solvent: EnsembleAtomGroup, distan self._dists = distances def _prepare_ensemble(self): - self._col = ['distance', 'solvent', 'interaction', - 'lambda', 'time', 'N_solvent'] + self._col = ['solute_ix', 'solvent_ix', 'distance', + 'solvent', 'interaction', + 'lambda', 'time'] self.results = pd.DataFrame(columns=self._col) self._res_dict = {key: [] for key in self._col} def _single_frame(self): solute = self._solute[self._key] solvent = self._solvent[self._key] - pairs, distances = capped_distance(solute.positions, solvent.positions, - max(self._dists), box=self._ts.dimensions) - solute_i, solvent_j = np.transpose(pairs) - for d in self._dists: - close_solv_atoms = solvent[solvent_j[distances < d]] - result = [d, self._key[0], self._key[1],self._key[2], - self._ts.time, close_solv_atoms.n_atoms] + #pairs, distances = capped_distance(solute.positions, solvent.positions, + pairs, distances = capped_distance(solute, solvent, + self._dists[0], box=self._ts.dimensions, + return_distances=True) + #solute_i, solvent_j = np.transpose(pairs) + #for d in self._dists: + # close_solv_atoms = solvent[solvent_j[distances < d]] + # result = [d, self._key[0], self._key[1],self._key[2], + # self._ts.time, close_solv_atoms.n_atoms] + # for i in range(len(self._col)): + # self._res_dict[self._col[i]].append(result[i]) + + for k, [i, j] in enumerate(pairs): + #su = solute.positions[i] + su = solute[i] + su_info = [su.ix, su.name, su.type, su.resname, su.resid, su.segid] + #sv = solvent.positions[j] + sv = solvent[j] + sv_info = [sv.ix, sv.name, sv.type, sv.resname, sv.resid, sv.segid] + d = distances[k] + result = [su_info, sv_info, d, + self._key[0], self._key[1], + self._key[2], self._ts.time] for i in range(len(self._col)): self._res_dict[self._col[i]].append(result[i]) diff --git a/mdpow/tests/test_automated_solvation_shell_analysis.py b/mdpow/tests/test_automated_solvation_shell_analysis.py new file mode 100644 index 00000000..2cd13b5e --- /dev/null +++ b/mdpow/tests/test_automated_solvation_shell_analysis.py @@ -0,0 +1,44 @@ +import os +import sys +import yaml +import pybol +import pytest +import pathlib +import logging + +import scipy +import numpy as np +import pandas as pd + +import rdkit +from rdkit import Chem + +import seaborn + +from numpy.testing import assert_almost_equal + +from . import RESOURCES + +import py.path + +from ..workflows import solvations + +from pkg_resources import resource_filename + +# ^review and update these as necessary, currently copied from test_ada + +RESOURCES = pathlib.PurePath(resource_filename(__name__, 'testing_resources')) +MANIFEST = RESOURCES / "manifest.yml" + +@pytest.fixture(scope="function") +def molname_workflows_directory(tmp_path, molname='SM25'): + m = pybol.Manifest(str(MANIFEST)) + m.assemble('workflows', tmp_path) + return tmp_path / molname + +class TestAutomatedSolvationShellAnalysis(object): + + @pytest.fixture(scope="function") + def SM25_tmp_dir(self, molname_workflows_directory): + dirname = molname_workflows_directory + return dirname \ No newline at end of file diff --git a/mdpow/workflows/registry.py b/mdpow/workflows/registry.py index aee129d9..2ef2c28b 100644 --- a/mdpow/workflows/registry.py +++ b/mdpow/workflows/registry.py @@ -12,11 +12,13 @@ :widths: auto :name: workflows_registry - +-------------------------------+------------------------------------------------------------------------------------------------------+ - | key/keyword: EnsembleAnalysis | value: . | - +===============================+======================================================================================================+ - | DihedralAnalysis | :any:`dihedrals.automated_dihedral_analysis ` | - +-------------------------------+------------------------------------------------------------------------------------------------------+ + +-------------------------------+----------------------------------------------------------------------------------------------------------------------+ + | key/keyword: EnsembleAnalysis | value: . | + +===============================+======================================================================================================================+ + | DihedralAnalysis | :any:`dihedrals.automated_dihedral_analysis ` | + +-------------------------------+----------------------------------------------------------------------------------------------------------------------+ + | SolvationAnalysis | :any:`solvations.automated_solvation_shell_analysis ` | + +-------------------------------+----------------------------------------------------------------------------------------------------------------------+ .. autodata:: registry @@ -26,10 +28,12 @@ # import analysis from mdpow.workflows import dihedrals +from mdpow.workflows import solvations registry = { - 'DihedralAnalysis' : dihedrals.automated_dihedral_analysis + 'DihedralAnalysis' : dihedrals.automated_dihedral_analysis, + 'SolvationAnalysis' : solvations.automated_solvation_shell_analysis } diff --git a/mdpow/workflows/solvations.py b/mdpow/workflows/solvations.py new file mode 100644 index 00000000..bebd775e --- /dev/null +++ b/mdpow/workflows/solvations.py @@ -0,0 +1,115 @@ +# MDPOW: solvations.py +# 2022 Cade Duckworth + +""" +:mod:`mdpow.workflows.solvations` --- Automation for :class:`SolvationAnalysis` +============================================================================== +:mod:`~mdpow.workflows.solvations` module with functions +useful for automated use of +:class:`~mdpow.analysis.solvation.SolvationAnalysis`. +See each function for usage, output, and examples. +Most functions can be used as standalone or in combination +depending on the desired results. Complete automation encompassed in +:func:`~mdpow.workflows.solvations.automated_solvation_shell_analysis`. + +.. autofunction:: solvation_ensemble +.. autofunction:: solvation_analysis +.. autofunction:: asa_save_df +.. autofunction:: automated_solvation_shell_analysis +""" + +import os +import numpy as np +import pandas as pd + +import mdpow +from mdpow.analysis.solvation import SolvationAnalysis + +import logging + +logger = logging.getLogger('mdpow.workflows.solvations') + +def solvation_ensemble(dirname, resname, solvents=('water', 'octanol'), + interactions=('Coulomb', 'VDW')): + + ens = mdpow.analysis.ensemble.Ensemble(dirname=dirname, + interactions=interactions, + solvents=solvents) + solute = ens.select_atoms(f'resname {resname}') + solvent = ens.select_atoms(f'not resname {resname}') + return solute, solvent + +def solvation_analysis(solute=None, solvent=None, distances=None, + start=None, stop=None, step=None): + + solv = SolvationAnalysis(solute=solute, solvent=solvent, distances=distances) + ds = solv.run(start=start, stop=stop, step=step) + df = solv.results + return df + +def asa_save_df(df, df_save_dir=None, resname=None, molname=None): + '''Takes a :class:`pandas.DataFrame` of results from + :class:`~mdpow.analysis.solvation.SolvationAnalysis` + as input to optionaly save the data. + Given a parent directory, creates subdirectory + for molecule, saves fully sampled csv. + :keywords: + *df* + results :class:`pandas.DataFrame` from + :class:`~mdpow.analysis.solvation.SolvationAnalysis` + *df_save_dir* + path to parent directory to create + subdirectory for saving the .csv files + ''' + + if molname is None: + molname = resname + + if df_save_dir is not None: + subdir = molname + newdir = os.path.join(df_save_dir, subdir) + os.mkdir(newdir) + + df = df.sort_values(by=["solvent", + "interaction", + "lambda"]).reset_index(drop=True) + + if df_save_dir is not None: + df.to_csv(f'{newdir}/{molname}_full_df.csv', index=False, compression='bz2') + # this part might need some work + return + +def automated_solvation_shell_analysis(dirname, df_save_dir=None, resname=None, molname=None, + solvents=('water', 'octanol'), interactions=('Coulomb', 'VDW'), + distances=[1.2, 2.4], figdir=None, + start=None, stop=None, step=None, + SMARTS=None, padding=None, width=None): + # figure out kwargs for each analysis type + """Measures the number of solvent molecules withing the given distances + in an :class:`~mdpow.analysis.ensemble.Ensemble` . + + :Parameters: + + *solute* + An :class:`~mdpow.analysis.ensemble.EnsembleAtom` containing the solute + used to measure distance. + + *solvent* + An :class:`~mdpow.analysis.ensemble.EnsembleAtom` containing the solvents + counted in by the distance measurement. Each solvent atom is counted by the + distance calculation. + + *distances* + Array like of the cutoff distances around the solute measured in Angstroms. + + The data is returned in a :class:`pandas.DataFrame` with observations sorted by + distance, solvent, interaction, lambda, time. + """ + + components = solvation_ensemble(dirname=dirname, resname=resname, solvents=solvents) + df = solvation_analysis(solute=components[0], solvent=components[1], + distances=distances, start=start, stop=stop, step=step) + if df_save_dir is not None: + asa_save_df(df, df_save_dir=df_save_dir, resname=resname, molname=molname) + + return df \ No newline at end of file