Skip to content

Commit 46c41e5

Browse files
committed
Dropped useless datasets, NNs and utils.
1 parent 646fff2 commit 46c41e5

27 files changed

+95
-1217
lines changed

README.md

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ Installed models can be imported as `from chytorch.zoo.<model_name> import Model
2121
Usage
2222
-----
2323

24-
`chytorch.nn.MoleculeEncoder` and `chytorch.nn.ReactionEncoder` - core graphormer layers for molecules and reactions.
24+
`chytorch.nn.MoleculeEncoder` - core graphormer layer for molecules encoding.
2525
API is combination of `torch.nn.TransformerEncoderLayer` with `torch.nn.TransformerEncoder`.
2626

2727
**Batch preparation:**
2828

29-
`chytorch.utils.data.MoleculeDataset` and `chytorch.utils.data.ReactionDataset` - Map-like on-the-fly dataset generators for molecules and reactions.
30-
Supported `chython.MoleculeContainer` and `chython.ReactionContainer` objects, and bytes-packed structures.
29+
`chytorch.utils.data.MoleculeDataset` - Map-like on-the-fly dataset generators for molecules.
30+
Supported `chython.MoleculeContainer` objects, and PaCh structures.
3131

32-
`chytorch.utils.data.collate_molecules` and `chytorch.utils.data.collate_reactions` - collate functions for `torch.utils.data.DataLoader`.
32+
`chytorch.utils.data.collate_molecules` - collate function for `torch.utils.data.DataLoader`.
3333

3434
Note: torch DataLoader automatically do proper collation since 1.13 release.
3535

@@ -52,10 +52,6 @@ Molecules coded as tensors of:
5252
* topological distances' matrix shifted by 2 with upper limit.
5353
0 - reserved for padding, 1 - reserved for not-connected graph components coding, 2 - self-loop, 3 - connected atoms.
5454

55-
Reactions coded in similar way. Molecules atoms and neighbors matrices just stacked. Distance matrices stacked on diagonal.
56-
Reactions include additional tensor with reaction role codes for each token.
57-
0 - padding, 1 - reaction CLS, 2 - reactants, 3 - products.
58-
5955
from chytorch.nn import MoleculeEncoder
6056

6157
encoder = MoleculeEncoder()
@@ -73,11 +69,6 @@ Reactions include additional tensor with reaction role codes for each token.
7369
dl = DataLoader(TensorDataset(MoleculeDataset(molecules_list), properties_tensor),
7470
collate_fn=chained_collate(collate_molecules, stack))
7571

76-
77-
**Scheduler:**
78-
79-
`chytorch.optim.lr_scheduler.WarmUpCosine` - Linear warmup followed with cosine-function for 0-pi range rescaled to lr_rate - decrease_coef * lr_rate interval.
80-
8172
**Voting NN with single hidden layer:**
8273

8374
`chytorch.nn.VotingClassifier`, `chytorch.nn.BinaryVotingClassifier` and `chytorch.nn.VotingRegressor` - speed optimized multiple heads for ensemble predictions.
@@ -94,8 +85,6 @@ All wrappers have `torch.utils.data.Dataset` interface.
9485
* `SizedList` - list wrapper with `size()` method. Useful with `torch.utils.data.TensorDataset`.
9586
* `SMILESDataset` - on-the-fly smiles to `chython.MoleculeContainer` or `chython.ReactionContainer` parser.
9687
* `LMDBMapper` - LMDB KV storage to dataset mapper.
97-
* `PostgresMapper` - Postgres DB table to dataset mapper.
98-
* `SMILESTokenizerDataset` - on-the-fly generator of tokenized SMILES.
9988
* `TensorUnpack`, `StructUnpack`, `PickleUnpack` - bytes to tensor/object unpackers
10089

10190

chytorch/nn/__init__.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
#
3-
# Copyright 2021-2023 Ramil Nugmanov <nougmanoff@protonmail.com>
3+
# Copyright 2021-2024 Ramil Nugmanov <nougmanoff@protonmail.com>
44
#
55
# Permission is hereby granted, free of charge, to any person obtaining a copy
66
# of this software and associated documentation files (the “Software”), to deal
@@ -20,8 +20,6 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222
#
23-
from .activation import *
24-
from .converters import *
2523
from .losses import *
2624
from .molecule import *
2725
from .reaction import *
@@ -33,9 +31,7 @@
3331
'ReactionEncoder',
3432
'Slicer',
3533
'VotingClassifier', 'VotingRegressor', 'BinaryVotingClassifier',
36-
'PulingHardtanh',
3734
'MultiTaskLoss',
3835
'CensoredLoss',
3936
'MaskedNaNLoss',
40-
'MSLELoss',
41-
'Converters', 'MultiColumnConverters']
37+
'MSLELoss']

chytorch/nn/converters.py

Lines changed: 0 additions & 97 deletions
This file was deleted.

chytorch/nn/functional/__init__.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

chytorch/nn/functional/puling_hardtanh.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

chytorch/nn/molecule/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222
#
23-
from .embedding import *
2423
from .encoder import *
2524

2625

27-
__all__ = ['MoleculeEncoder',
28-
'EmbeddingBag']
26+
__all__ = ['MoleculeEncoder']
File renamed without changes.

chytorch/nn/molecule/encoder.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222
#
23+
from itertools import repeat
2324
from torch.nn import GELU, Module, ModuleList, LayerNorm
2425
from torchtyping import TensorType
26+
from typing import Tuple, Optional, List
2527
from warnings import warn
26-
from .embedding import EmbeddingBag
28+
from ._embedding import EmbeddingBag
2729
from ..lora import Embedding
2830
from ..transformer import EncoderLayer
2931
from ...utils.data import MoleculeDataBatch
@@ -49,6 +51,7 @@ def __init__(self, max_neighbors: int = 14, max_distance: int = 10, d_model: int
4951
shared_attention_bias: bool = True, dropout: float = 0.1, activation=GELU,
5052
layer_norm_eps: float = 1e-5, norm_first: bool = False, post_norm: bool = False,
5153
zero_bias: bool = False, perturbation: float = 0., max_tokens: int = 121,
54+
projection_bias: bool = True, ff_bias: bool = True,
5255
lora_r: int = 0, lora_alpha: float = 1., lora_dropout: float = 0.):
5356
"""
5457
Molecule Graphormer from https://doi.org/10.1021/acs.jcim.2c00344.
@@ -103,16 +106,21 @@ def __init__(self, max_neighbors: int = 14, max_distance: int = 10, d_model: int
103106
self.shared_weights = shared_weights
104107
if shared_weights:
105108
self.layer = EncoderLayer(d_model, nhead, dim_feedforward, dropout, activation, layer_norm_eps, norm_first,
109+
projection_bias=projection_bias, ff_bias=ff_bias,
106110
lora_r=lora_r, lora_alpha=lora_alpha, lora_dropout=lora_dropout)
107111
self.layers = [self.layer] * num_layers
108112
else:
109113
# layers sharing scheme can be manually changed. e.g. pairs of shared encoders
110114
self.layers = ModuleList(EncoderLayer(d_model, nhead, dim_feedforward, dropout, activation,
111115
layer_norm_eps, norm_first, lora_r=lora_r, lora_alpha=lora_alpha,
116+
projection_bias=projection_bias, ff_bias=ff_bias,
112117
lora_dropout=lora_dropout) for _ in range(num_layers))
113118
self._register_load_state_dict_pre_hook(_update)
114119

115-
def forward(self, batch: MoleculeDataBatch) -> TensorType['batch', 'atoms', 'embedding']:
120+
def forward(self, batch: MoleculeDataBatch, /, *,
121+
cache: Optional[List[Tuple[TensorType['batch', 'atoms+conditions', 'embedding'],
122+
TensorType['batch', 'atoms+conditions', 'embedding']]]] = None) -> \
123+
TensorType['batch', 'atoms', 'embedding']:
116124
"""
117125
Use 0 for padding.
118126
Atoms should be coded by atomic numbers + 2.
@@ -122,15 +130,16 @@ def forward(self, batch: MoleculeDataBatch) -> TensorType['batch', 'atoms', 'emb
122130
Distances should be coded from 2 (means self-loop) to max_distance + 2.
123131
Non-reachable atoms should be coded by 1.
124132
"""
133+
cache = repeat(None) if cache is None else iter(cache)
125134
atoms, neighbors, distances = batch
126135

127136
x = self.embedding(atoms, neighbors)
128137

129-
for lr, d in zip(self.layers, self.distance_encoders):
138+
for lr, d, c in zip(self.layers, self.distance_encoders, cache):
130139
if d is not None:
131140
d_mask = d(distances).permute(0, 3, 1, 2) # BxNxNxH > BxHxNxN
132141
# else: reuse previously calculated mask
133-
x, _ = lr(x, d_mask) # noqa
142+
x, _ = lr(x, d_mask, cache=c) # noqa
134143

135144
if self.post_norm:
136145
return self.norm(x)

0 commit comments

Comments
 (0)