diff --git a/.gitignore b/.gitignore index 145f67b..f0b3d02 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ -01-benchmark_surfaces/ -01-benchmark_surfaces_npy/ -01-benchmark_pdbs_npy/ -01-benchmark_pdbs/ -01-benchmark_pdbs/ + +# dMaSIF +surface_data +.vscode +runs shape_index/ masif_preds/ runs/ @@ -13,6 +13,179 @@ NeurIPS_2020_benchmarks/ *.out figures/ timings/ -data_analysis/roc_curves -data_analysis/.ipynb_checkpoints/ -.ipynb_checkpoints/ \ No newline at end of file +data_analysis + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +Footer +© 2023 GitHub, Inc. +Footer navigation +Terms +Privacy +Security +Status +Docs +Contact GitHub +Pricing +API +Training +Blog +About diff --git a/README.md b/README.md index f53d0fb..85df565 100644 --- a/README.md +++ b/README.md @@ -44,16 +44,16 @@ Models have been trained on either a single NVIDIA RTX 2080 Ti or a single Tesla Scripts have been tested using the following two sets of core dependencies: -| Dependency | First Option | Second Option | -| ------------- | ------------- | ------------- | -| GCC | 7.5.0 | 8.4.0 | -| CMAKE | 3.10.2 | 3.16.5 | -| CUDA | 10.0.130 | 10.2.89 | -| cuDNN | 7.6.4.38 | 7.6.5.32 | -| Python | 3.6.9 | 3.7.7 | -| PyTorch | 1.4.0 | 1.6.0 | -| PyKeops | 1.4 | 1.4.1 | -| PyTorch Geometric | 1.5.0 | 1.6.1 | +| Dependency | First Option | Second Option | Updated Version | +| ------------- | ------------- | ------------- | ------------- | +| GCC | 7.5.0 | 8.4.0 | 9.2.0 | +| CMAKE | 3.10.2 | 3.16.5 | 3.22.2 | +| CUDA | 10.0.130 | 10.2.89 | 11.7 | +| cuDNN | 7.6.4.38 | 7.6.5.32 | 7.6.x | +| Python | 3.6.9 | 3.7.7 | 3.8.16 | +| PyTorch | 1.4.0 | 1.6.0 | 1.13.1 | +| PyKeops | 1.4 | 1.4.1 | 2.1.1 | +| PyTorch Geometric | 1.5.0 | 1.6.1 | 2.2.0 | ## Code overview diff --git a/benchmark_models.py b/benchmark_models.py index d689f5e..3f39a15 100644 --- a/benchmark_models.py +++ b/benchmark_models.py @@ -320,12 +320,12 @@ def load_mesh(self, xyz, triangles=None, normals=None, weights=None, batch=None) # 3.b) Pseudo-geodesic window: # Pseudo-geodesic squared distance: - rho2_ij = ((x_j - x_i) ** 2).sum(-1) * ((2 - (n_i | n_j)) ** 2) # (N, N, 1) + rho2_ij = ((x_j - x_i) ** 2).sum(-1) * ((2 - (n_i | n_j)) ** 2) # (N, N, 1) # eq (5) in paper # Gaussian window: window_ij = (-rho2_ij).exp() # (N, N, 1) # 3.c) Coordinates in the (u, v) basis - not oriented yet: - X_ij = uv_i.matvecmult(x_j - x_i) # (N, N, 2) + X_ij = uv_i.matvecmult(x_j - x_i) # (N, 1, 6)*(N, N, 3) -> (N, N, 2) # 3.d) Local average in the tangent plane: orientation_weight_ij = window_ij * weights_j # (N, N, 1) diff --git a/data.py b/data.py index f73fd14..2837258 100644 --- a/data.py +++ b/data.py @@ -217,7 +217,7 @@ def __init__( self.rand_rot1 = rand_rot1 self.rand_rot2 = rand_rot2 - def __inc__(self, key, value): + def __inc__(self, key, value, *args, **kwargs): if key == "face_p1": return self.xyz_p1.size(0) if key == "face_p2": @@ -225,7 +225,7 @@ def __inc__(self, key, value): else: return super(PairData, self).__inc__(key, value) - def __cat_dim__(self, key, value): + def __cat_dim__(self, key, value, *args, **kwargs): if ("index" in key) or ("face" in key): return 1 else: diff --git a/data_iteration.py b/data_iteration.py index e495cc4..9765012 100644 --- a/data_iteration.py +++ b/data_iteration.py @@ -321,7 +321,7 @@ def iterate( if not args.single_protein: P2["rand_rot"] = torch.eye(3, device=P2["xyz"].device) P2["atom_center"] = torch.zeros((1, 3), device=P2["xyz"].device) - + torch.cuda.synchronize() prediction_time = time.time() outputs = net(P1, P2) diff --git a/data_preprocessing/convert_pdb2npy.py b/data_preprocessing/convert_pdb2npy.py index f53be54..df62a92 100644 --- a/data_preprocessing/convert_pdb2npy.py +++ b/data_preprocessing/convert_pdb2npy.py @@ -34,6 +34,9 @@ def load_structure_np(fname, center): def convert_pdbs(pdb_dir, npy_dir): print("Converting PDBs") for p in tqdm(pdb_dir.glob("*.pdb")): - protein = load_structure_np(p, center=False) + try: + protein = load_structure_np(p, center=False) + except: + print(p) np.save(npy_dir / (p.stem + "_atomxyz.npy"), protein["xyz"]) np.save(npy_dir / (p.stem + "_atomtypes.npy"), protein["types"]) diff --git a/data_preprocessing/convert_ply2npy.py b/data_preprocessing/convert_ply2npy.py index 0b3d17d..c0911bc 100644 --- a/data_preprocessing/convert_ply2npy.py +++ b/data_preprocessing/convert_ply2npy.py @@ -42,7 +42,10 @@ def load_surface_np(fname, center): def convert_plys(ply_dir, npy_dir): print("Converting PLYs") for p in tqdm(ply_dir.glob("*.ply")): - protein = load_surface_np(p, center=False) + try: + protein = load_surface_np(p, center=False) + except: + print(p) np.save(npy_dir / (p.stem + "_xyz.npy"), protein["xyz"]) np.save(npy_dir / (p.stem + "_triangles.npy"), protein["triangles"]) np.save(npy_dir / (p.stem + "_features.npy"), protein["features"]) diff --git a/geometry_processing.py b/geometry_processing.py index 501df57..e92674a 100644 --- a/geometry_processing.py +++ b/geometry_processing.py @@ -541,13 +541,12 @@ def curvatures( # (minus) Shape operator, i.e. the differential of the Gauss map: # = (PPt^-1 @ PQt) : simple estimation through linear regression - S = torch.solve(PQt, PPt).solution + S = torch.linalg.solve(PQt, PPt) a, b, c, d = S[:, 0, 0], S[:, 0, 1], S[:, 1, 0], S[:, 1, 1] # (N,) - # Normalization mean_curvature = a + d gauss_curvature = a * d - b * c - features += [mean_curvature.clamp(-1, 1), gauss_curvature.clamp(-1, 1)] + features += [torch.nan_to_num(mean_curvature).clamp(-1, 1), torch.nan_to_num(gauss_curvature).clamp(-1, 1)] features = torch.stack(features, dim=-1) return features diff --git a/main_training.py b/main_training.py index f3c3b4b..ffbd166 100644 --- a/main_training.py +++ b/main_training.py @@ -15,6 +15,11 @@ from helper import * from Arguments import parser +import pykeops + +# Clean up the already compiled files +pykeops.clean_pykeops() + # Parse the arguments, prepare the TensorBoard writer: args = parser.parse_args() writer = SummaryWriter("runs/{}".format(args.experiment_name)) @@ -110,7 +115,7 @@ dataloader = val_loader elif dataset_type == "Test": dataloader = test_loader - + # Perform one pass through the data: info = iterate( net, diff --git a/models/dMaSIF_search_3layer_12A_16dim b/models/dMaSIF_search_3layer_12A_16dim deleted file mode 100644 index 9dae9e0..0000000 Binary files a/models/dMaSIF_search_3layer_12A_16dim and /dev/null differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..b853079 --- /dev/null +++ b/test.py @@ -0,0 +1,52 @@ +# # Standard imports: +# import numpy as np +# import torch +# from torch.utils.data import random_split +# from torch_geometric.loader import DataLoader +# from torch_geometric.transforms import Compose +# from pathlib import Path + +# # Custom data loader and model: +# from data import ProteinPairsSurfaces, PairData, CenterPairAtoms +# from data import RandomRotationPairAtoms, NormalizeChemFeatures, iface_valid_filter +# from helper import * +# from Arguments import parser + +# # args +# random_rotation = True +# batch_size = 8 +# search = True +# radius = 12. + +# # We load the train and test datasets. +# # Random transforms, to ensure that no network/baseline overfits on pose parameters: +# transformations = ( +# Compose([NormalizeChemFeatures(), CenterPairAtoms(), RandomRotationPairAtoms()]) +# if random_rotation +# else Compose([NormalizeChemFeatures()]) +# ) + +# # PyTorch geometric expects an explicit list of "batched variables": +# batch_vars = ["xyz_p1", "xyz_p2", "atom_coords_p1", "atom_coords_p2"] +# # Load the train dataset: +# train_dataset = ProteinPairsSurfaces( +# "surface_data", ppi=search, train=True, transform=transformations +# ) +# # train_dataset = [data for data in train_dataset if iface_valid_filter(data)] +# # train_loader = DataLoader( +# # train_dataset, batch_size=1, follow_batch=batch_vars, shuffle=True +# # ) +# print("Preprocessing training dataset") + +# Testing PyKeops installation +import pykeops + +# Changing verbose and mode +pykeops.verbose = True +pykeops.build_type = 'Debug' + +# Clean up the already compiled files +pykeops.clean_pykeops() + +# Test Numpy integration +pykeops.test_numpy_bindings() \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..06915ee --- /dev/null +++ b/test.sh @@ -0,0 +1,5 @@ +#! /usr/bin/env + +module load gcc/9.2.0 cuda/11.7 + +python -W ignore -u main_training.py --experiment_name dMaSIF_search_3layer_12A --batch_size 64 --embedding_layer dMaSIF --search True --device cuda:0 --random_rotation True --radius 12.0 --n_layers 3 --seed 0 \ No newline at end of file