Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Interaction and Better Testing #71

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
31beb71
refactor interaction and initial testing
Mar 26, 2024
dccf676
minor changes
Mar 26, 2024
2ab64aa
dummy modification
Mar 26, 2024
189ab90
undo changes in interaction dataset, and minor change in shape
Mar 29, 2024
282dc91
changed super class to BaseInteractionDataset
Apr 2, 2024
701ef1e
Merge branch 'release' into testing
Apr 3, 2024
afea053
further simplified and rebase
Apr 3, 2024
ebc2adf
fixes
Apr 5, 2024
7ffd0b1
Merge remote-tracking branch 'origin/release' into testing
Apr 5, 2024
d15e9cf
Merge remote-tracking branch 'origin/release' into testing
Apr 5, 2024
ed8e264
Updated metcalf
Apr 5, 2024
18bc79c
bug fix and simplifying interaction dataset
Apr 6, 2024
2a6e3ef
Updated tests for interaction datasets
Apr 6, 2024
7493273
removed stale stats in dummy interaction
Apr 6, 2024
ed73e7d
changes based on comments
Apr 6, 2024
0359022
Clean metcalf
FNTwin Apr 6, 2024
33fa342
Simplification
FNTwin Apr 6, 2024
cd486a8
cleaned des
FNTwin Apr 6, 2024
80d7371
Simplified des dataset
FNTwin Apr 6, 2024
f3d205c
removed redundant dataset files
FNTwin Apr 6, 2024
da4fece
DES inerithance
FNTwin Apr 6, 2024
71ff741
Removed des and improved des naming
FNTwin Apr 6, 2024
f6e12e1
DES fixes
FNTwin Apr 6, 2024
3328a65
Removed comments
FNTwin Apr 6, 2024
8b28d59
X40 and L70
FNTwin Apr 6, 2024
8595fd8
Safe opening
FNTwin Apr 6, 2024
ca1b4af
Moved X40 in L7 and removed x40.py
FNTwin Apr 6, 2024
4bec82d
Moved Yaml utils to _utils.py, L7 + X40 interface
FNTwin Apr 7, 2024
a5ced0a
Merge testing + Add imports
FNTwin Apr 8, 2024
a21963e
Merge pull request #79 from OpenDrugDiscovery/interaction_impr
shenoynikhil Apr 8, 2024
3303f95
better convert function and n_body_first to ptr
Apr 12, 2024
6f033cf
Updated splinter reading from -1 to nan
Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion openqdc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ def get_project_root():
_lazy_imports_obj = {
"__version__": "openqdc._version",
"BaseDataset": "openqdc.datasets.base",
# POTENTIAL
"ANI1": "openqdc.datasets.potential.ani",
"ANI1CCX": "openqdc.datasets.potential.ani",
"ANI1X": "openqdc.datasets.potential.ani",
Expand All @@ -32,12 +33,23 @@ def get_project_root():
"SolvatedPeptides": "openqdc.datasets.potential.solvated_peptides",
"WaterClusters": "openqdc.datasets.potential.waterclusters3_30",
"TMQM": "openqdc.datasets.potential.tmqm",
"Dummy": "openqdc.datasets.potential.dummy",
"PCQM_B3LYP": "openqdc.datasets.potential.pcqm",
"PCQM_PM6": "openqdc.datasets.potential.pcqm",
"RevMD17": "openqdc.datasets.potential.revmd17",
"Transition1X": "openqdc.datasets.potential.transition1x",
"MultixcQM9": "openqdc.datasets.potential.multixcqm9",
# INTERACTION
"DES5M": "openqdc.datasets.interaction.des",
"DES370K": "openqdc.datasets.interaction.des",
"DESS66": "openqdc.datasets.interaction.des",
"DESS66x8": "openqdc.datasets.interaction.des",
"L7": "openqdc.datasets.interaction.l7",
"X40": "openqdc.datasets.interaction.x40",
"Metcalf": "openqdc.datasets.interaction.metcalf",
"Splinter": "openqdc.datasets.interaction.splinter",
# DEBUG
"Dummy": "openqdc.datasets.potential.dummy",
# ALL
"AVAILABLE_DATASETS": "openqdc.datasets",
"AVAILABLE_POTENTIAL_DATASETS": "openqdc.datasets.potential",
"AVAILABLE_INTERACTION_DATASETS": "openqdc.datasets.interaction",
Expand Down Expand Up @@ -75,6 +87,13 @@ def __dir__():
from ._version import __version__ # noqa
from .datasets import AVAILABLE_DATASETS # noqa
from .datasets.base import BaseDataset # noqa

# INTERACTION
from .datasets.interaction.des import DES5M, DES370K, DESS66, DESS66x8 # noqa
from .datasets.interaction.l7 import L7 # noqa
from .datasets.interaction.metcalf import Metcalf # noqa
from .datasets.interaction.splinter import Splinter # noqa
from .datasets.interaction.x40 import X40 # noqa
from .datasets.potential.ani import ANI1, ANI1CCX, ANI1X # noqa
from .datasets.potential.comp6 import COMP6 # noqa
from .datasets.potential.dummy import Dummy # noqa
Expand Down
32 changes: 27 additions & 5 deletions openqdc/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@

@requires_package("torch")
def to_torch(x: np.ndarray):
if isinstance(x, torch.Tensor):
return x
return torch.from_numpy(x)


@requires_package("jax")
def to_jax(x: np.ndarray):
if isinstance(x, jnp.ndarray):
return x
return jnp.array(x)


Expand Down Expand Up @@ -166,6 +170,7 @@ def _precompute_statistics(self, overwrite_local_cache: bool = False):
PerAtomFormationEnergyStats,
)
self.statistics.run_calculators() # run the calculators
self._compute_average_nb_atoms()

@classmethod
def no_init(cls):
Expand Down Expand Up @@ -243,6 +248,14 @@ def data_keys(self):
keys.remove("forces")
return keys

@property
shenoynikhil marked this conversation as resolved.
Show resolved Hide resolved
def pkl_data_keys(self):
shenoynikhil marked this conversation as resolved.
Show resolved Hide resolved
return list(self.pkl_data_types.keys())

@property
def pkl_data_types(self):
return {"name": str, "subset": str, "n_atoms": np.int32}

@property
def data_types(self):
return {
Expand All @@ -257,8 +270,8 @@ def data_shapes(self):
return {
"atomic_inputs": (-1, NB_ATOMIC_FEATURES),
"position_idx_range": (-1, 2),
"energies": (-1, len(self.energy_target_names)),
"forces": (-1, 3, len(self.force_target_names)),
"energies": (-1, len(self.energy_methods)),
"forces": (-1, 3, len(self.force_methods)),
}

def _set_units(self, en, ds):
Expand Down Expand Up @@ -332,8 +345,14 @@ def save_preprocess(self, data_dict):

# save smiles and subset
local_path = p_join(self.preprocess_path, "props.pkl")
for key in ["name", "subset"]:
data_dict[key] = np.unique(data_dict[key], return_inverse=True)

# assert that (required) pkl keys are present in data_dict
assert all([key in data_dict.keys() for key in self.pkl_data_keys])

# store unique and inverse indices for str-based pkl keys
for key in self.pkl_data_keys:
shenoynikhil marked this conversation as resolved.
Show resolved Hide resolved
if self.pkl_data_types[key] == str:
data_dict[key] = np.unique(data_dict[key], return_inverse=True)

with open(local_path, "wb") as f:
pkl.dump(data_dict, f)
Expand Down Expand Up @@ -369,7 +388,10 @@ def read_preprocess(self, overwrite_local_cache=False):
pull_locally(filename, overwrite=overwrite_local_cache)
with open(filename, "rb") as f:
tmp = pkl.load(f)
for key in ["name", "subset", "n_atoms"]:
all_pkl_keys = set(tmp.keys()) - set(self.data_keys)
# assert required pkl_keys are present in all_pkl_keys
assert all([key in all_pkl_keys for key in self.pkl_data_keys])
shenoynikhil marked this conversation as resolved.
Show resolved Hide resolved
for key in all_pkl_keys:
x = tmp.pop(key)
if len(x) == 2:
self.data[key] = x[0][x[1]]
Expand Down
123 changes: 0 additions & 123 deletions openqdc/datasets/interaction/L7.py

This file was deleted.

84 changes: 0 additions & 84 deletions openqdc/datasets/interaction/X40.py

This file was deleted.

9 changes: 3 additions & 6 deletions openqdc/datasets/interaction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
from .base import BaseInteractionDataset # noqa
from .des5m import DES5M
from .des370k import DES370K
from .dess66 import DESS66
from .dess66x8 import DESS66x8
from .L7 import L7
from .des import DES5M, DES370K, DESS66, DESS66x8
from .l7 import L7
from .metcalf import Metcalf
from .splinter import Splinter
from .X40 import X40
from .x40 import X40

AVAILABLE_INTERACTION_DATASETS = {
"des5m": DES5M,
Expand Down
Loading
Loading