diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index cfe752d7..419fb0d5 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -12,10 +12,10 @@ jobs: shell: bash -l {0} steps: - uses: actions/checkout@v3 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: '3.10' - name: Lint with flake8 run: | pip install flake8 diff --git a/.readthedocs.yml b/.readthedocs.yml index 0646f07d..9966bf6a 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,9 +7,9 @@ version: 2 # Set the OS, Python version and other tools you might need build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: - python: "3.9" + python: "3.10" # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/LICENSE b/LICENSE index 5607977d..9d7da3c1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020-2022 Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity +Copyright (c) 2020-2025 Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/about.rst b/docs/about.rst index c38fcd99..404de572 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -27,14 +27,14 @@ All functionality is available as a python package. Citation ******** -If you publish about work for which PENSA was useful, please cite our preprint: +If you publish about work for which PENSA was useful, please cite our publication: - M. Vögele, N. J. Thomson, S. T. Truong, J. McAvity, U. Zachariae, R. O. Dror: - *Systematic Analysis of Biomolecular Conformational Ensembles with PENSA*. - `arXiv:2212.02714 [q-bio.BM] 2022 `_. + M. Vögele, N. J. Thomson, S. T. Truong, J. McAvity, U. Zachariae, R. O. Dror: + *Systematic analysis of biomolecular conformational ensembles with PENSA*. + `J. Chem. Phys. 162, 014101 (2025) `_. The reference for the software implementation itself is the following: - Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity: *PENSA*. Zenodo, 2024. http://doi.org/10.5281/zenodo.4362136 + Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity: *PENSA*. Zenodo, 2025. http://doi.org/10.5281/zenodo.4362136 To get the citation and DOI for a particular version, see `Zenodo `_. diff --git a/docs/conf.py b/docs/conf.py index 61625272..3d0748fc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,7 +21,7 @@ # -- Project information ----------------------------------------------------- project = 'PENSA' -copyright = '2020-2024, Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity' +copyright = '2020-2025, Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity' author = 'Martin Vögele, Neil Thomson, Sang Truong, Jasper McAvity' diff --git a/docs/installation.rst b/docs/installation.rst index 411674ac..ec1c6371 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Create and activate a conda environment: .. code:: bash - conda create --name pensa python=3.9 numpy==1.22 scipy==1.9 pandas==1.4 matplotlib==3.5 MDAnalysis==2.2 cython biotite -c conda-forge -c conda-forge + conda create --name pensa python==3.10 scipy numpy pandas matplotlib MDAnalysis==2.8 deeptime biotite pip -c conda-forge conda activate pensa If you want to use PENSA with Jupyter notebooks: diff --git a/docs/requirements.txt b/docs/requirements.txt index 523dada8..8240fda2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,12 +7,12 @@ sphinxcontrib-htmlhelp sphinxcontrib-jsmath sphinxcontrib-qthelp sphinxcontrib-serializinghtml -numpy==1.22 -scipy==1.9 -pandas==1.4 -matplotlib==3.5 +numpy +scipy +pandas +matplotlib deeptime -MDAnalysis==2.2 +MDAnalysis==2.8 cython biotite -pensa==0.3.0 +pensa diff --git a/docs/tut-2-preprocessing.rst b/docs/tut-2-preprocessing.rst index 1f70a16f..eff32d78 100644 --- a/docs/tut-2-preprocessing.rst +++ b/docs/tut-2-preprocessing.rst @@ -10,19 +10,19 @@ Preprocessing extract_aligned_coordinates, extract_combined_grid -Coordinates -*********** +To work with the biomolecule's coordinates, it is often easier to first extract +them from the simulation, i.e., remove the solvent, lipids etc. If you would like +to calculate water or ion features, you need to calculate the corresponding density. +This kind of preprocessing steps can be cumbersome but you usually only do it +once and can then play with your data. -To work with the biomolecule's coordinates, we first need to extract them from -the simulation, i.e., remove the solvent, lipids etc. This is the hardest part -but you usually only have to do it once and can then play with your data. -Preprocessing can handle many common trajectory formats as it is based on -MDAnalysis. You can start by using the scripts provided in the PENSA repository. -Once you know how PENSA works, you can write your own scripts. +Based on MDAnalysis, PENSA's preprocessing functions can handle many common formats +of molecular simulation trajectories. You can start by using the scripts provided +in the PENSA repository. Once you know how PENSA works, you can write your own scripts. Files and Directories ---------------------- +********************** In the following, we define the necessary files. For each simulation, we need a reference file (.psf for AMBER), a PDB file, and the trajetory. @@ -71,8 +71,8 @@ will generate. os.makedirs(subdir) -Extracting Coordinates ----------------------- +Coordinates +*********** We have to ensure that from both simulations, we use the exact same parts of the receptor for the analysis. Often, this will be easy and you @@ -91,14 +91,24 @@ In the first case, we will extract all protein residues, assuming extract_coordinates(ref_file_a, pdb_file_a, trj_file_a, out_name_a+"_receptor", sel_base_a) extract_coordinates(ref_file_b, pdb_file_b, trj_file_b, out_name_b+"_receptor", sel_base_b) -In many cases, you probably have several runs of the same simulation -that you want to combine to one structural ensemble. This is why the -trajectory argument takes a list as arguments, e.g. +In some cases, you may have only one trajectory while in others, +you may have several runs of the same simulation that you want +to combine to one structural ensemble. +This is why the trajectory argument can be either a single string + +.. code:: python + + extract_coordinates( + 'system.psf', 'system.pdb', 'run1.nc', + 'receptor', 'protein', start_frame=1000 + ) + +... or a list of strings. .. code:: python extract_coordinates( - 'system.psf', 'system.pdb', ['run1.nc','run2.nc','run3.nc'], + 'system.psf', 'system.pdb', ['run1.nc','run2.nc','run3.nc'], 'receptor', 'protein', start_frame=1000 ) @@ -129,8 +139,8 @@ Here, we use selections based on the definitions of transmembrane helices in the sel_string_b = "protein and resnum "+resnums print('Selection B:\n', sel_string_b, '\n') # Extract the coordinates of the transmembrane region from the trajectory - extract_coordinates(ref_file_a, pdb_file_a, [trj_file_a], out_name_a+"_tm", sel_string_a) - extract_coordinates(ref_file_b, pdb_file_b, [trj_file_b], out_name_b+"_tm", sel_string_b) + extract_coordinates(ref_file_a, pdb_file_a, trj_file_a, out_name_a+"_tm", sel_string_a) + extract_coordinates(ref_file_b, pdb_file_b, trj_file_b, out_name_b+"_tm", sel_string_b) Loading from Multiple Simulations diff --git a/environment.yml b/environment.yml index 53d086f6..1d98214a 100644 --- a/environment.yml +++ b/environment.yml @@ -3,11 +3,12 @@ channels: - conda-forge - defaults dependencies: - - python==3.9 - - scipy==1.9 - - numpy==1.22 - - pandas==1.4 - - matplotlib==3.5 - - MDAnalysis==2.2 + - python==3.10 + - scipy + - numpy + - pandas + - matplotlib + - MDAnalysis==2.8 - deeptime - biotite + - pip diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..789d1d17 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,63 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pensa" +version = "0.6.0" +description = "exploratory analysis and comparison of biomolecular conformational ensembles." +authors = [ + {name = "Martin Voegele, Neil Thomson, Sang Truong, Jasper McAvity", email = "martinvoegele1989@gmail.com"} +] +license = {text = "MIT"} +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Chemistry", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Scientific/Engineering :: Visualization", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Medical Science Apps.", + "Topic :: Scientific/Engineering :: Molecular Dynamics", + "Topic :: Scientific/Engineering :: Computational Biology", +] +dependencies = [ + "numpy>=1.23", + "scipy", + "pandas", + "matplotlib", + "deeptime", + "MDAnalysis==2.8", + "biotite", + "gpcrmining", +] +requires-python = "==3.10.*" + +[project.urls] +Documentation = "https://pensa.readthedocs.io/en/latest/" +Source = "http://github.com/drorlab/pensa" + +[metadata] +# Include the license file in the wheel. +license_files = ["LICENSE"] + +[tool.setuptools.packages.find] +include = [ + "pensa", + "pensa.preprocessing", + "pensa.features", + "pensa.comparison", + "pensa.dimensionality", + "pensa.clusters", + "pensa.statesinfo" +] + + + + diff --git a/run-test.sb b/run-test.sb deleted file mode 100644 index 7f4705c3..00000000 --- a/run-test.sb +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -#SBATCH --time=24:00:00 -#SBATCH --mem=20G -#SBATCH --partition=rondror -#SBATCH --qos=high_p - -# Activate PENSA environment -source /home/users/mvoegele/miniconda3/etc/profile.d/conda.sh -conda activate /oak/stanford/groups/rondror/users/mvoegele/envs/pensa_dev - -# Run the tests -pytest - diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 3f48337d..00000000 --- a/setup.cfg +++ /dev/null @@ -1,3 +0,0 @@ -[metadata] -# Include the license file in the wheel. -license_files = LICENSE diff --git a/setup.py b/setup.py deleted file mode 100644 index f8ed5792..00000000 --- a/setup.py +++ /dev/null @@ -1,44 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='pensa', - version='0.5.0', - description='exploratory analysis and comparison of biomolecular conformational ensembles.', - url='http://github.com/drorlab/pensa', - author='Martin Voegele, Neil Thomson, Sang Truong, Jasper McAvity', - author_email='martinvoegele1989@gmail.com', - license='MIT', - packages=find_packages( - include=[ - 'pensa', - 'pensa.preprocessing', - 'pensa.features', - 'pensa.comparison', - 'pensa.dimensionality', - 'pensa.clusters', - 'pensa.statesinfo', - ] - ), - zip_safe=False, - install_requires=[ - 'numpy==1.22', # density functions in MDAnalysis 2 use np.histogramdd() with keyword normed which is deprecated in numpy 1.21 and removed in numpy 1.24 - 'scipy==1.9', - 'pandas==1.4', - 'matplotlib==3.5', - 'deeptime', - 'MDAnalysis==2.2', # some features we use will likely be removed in MDA 3 - 'biotite', - 'gpcrmining', - ], - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 4 - Beta', - # license (should match "license" above) - 'License :: OSI Approved :: MIT License', - # Supported Python versions - 'Programming Language :: Python :: 3', - ], -)