Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pdb and setup fixes #317

Merged
merged 5 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ jobs:
conda info -a
conda create -q -n test-environment python=${{ matrix.python-version }} numpy scipy numba pandas matplotlib
source activate test-environment
- name: Run setup.py
- name: Install Hatch
uses: pypa/hatch@install
- name: Build and install package
run: |
pip install build
python setup.py sdist --formats=zip -k
python -m build
find ./dist -iname "*.zip" -print0 | xargs -0 pip install
hatch build
find ./dist -iname "*.tar.gz" -print0 | xargs -0 pip install
pip install codecov
- name: Download test files
run: |
Expand Down
18 changes: 7 additions & 11 deletions .github/workflows/build_test_and_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ jobs:
conda info -a
conda create -q -n test-environment python=${{ matrix.python-version }} numpy scipy numba pandas matplotlib
source activate test-environment
- name: Run setup.py
- name: Install Hatch
uses: pypa/hatch@install
- name: Build and install package
run: |
python setup.py sdist --formats=zip -k
find ./dist -iname "*.zip" -print0 | xargs -0 pip install
hatch build
find ./dist -iname "*.tar.gz" -print0 | xargs -0 pip install
pip install codecov
- name: Download test files
run: |
Expand All @@ -42,14 +44,8 @@ jobs:
with:
run: coverage run -m unittest discover -s test -p "Test*.py"
working-directory: ./ #optional
- name: Publish evcouplings to test PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.PYPI_ACCESS_TOKEN_TEST }}
repository_url: https://test.pypi.org/legacy/
- name: Publish evcouplings to PyPI
if: startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
uses: pypa/gh-action-pypi-publish@v1.9.0
with:
user: __token__
password: ${{ secrets.PYPI_ACCESS_TOKEN }}
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
*.ipynb_checkpoints*
notebooks_dev/*
evcouplings.egg-info/*
/dist/
3 changes: 0 additions & 3 deletions MANIFEST.in

This file was deleted.

38 changes: 26 additions & 12 deletions evcouplings/compare/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,9 @@ def __init__(self, filehandle, keep_full_data=False):
"_atom_site.pdbx_formal_charge": "charge",
}

HELIX_TARGET_COLS = {
# full list of conf types: https://mmcif.wwpdb.org/dictionaries/mmcif_ma.dic/Items/_struct_conf_type.id.html;
# mapping between file types: https://manpages.debian.org/unstable/dssp/mkdssp.1.en.html
CONF_TARGET_COLS = {
"_struct_conf.conf_type_id": "conformation_type",
"_struct_conf.id": "id",
# label_asym_id and label_seq_id are sufficient for merging to atom table;
Expand Down Expand Up @@ -508,11 +510,15 @@ def __init__(self, filehandle, keep_full_data=False):
# decode information into dataframe with BioPython helper method; note this section may not be
# present if no helices exist in the structure
try:
self.helix_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in HELIX_TARGET_COLS.items()
})
self.conf_table = pd.DataFrame({
name: _decode(data[source_column]) for source_column, name in CONF_TARGET_COLS.items()
}).query(
# there are a handful of PDB entries that have (probably wrong) secondary structure assignments
# extending over more than one segment (e.g. 2bp7, 2wjv), drop these rather than raising an error
"beg_label_asym_id == end_label_asym_id"
)
except KeyError:
self.helix_table = None
self.conf_table = None

# decode information into dataframe with BioPython helper method; note this section may not be
# present if no sheets exist in the structure
Expand All @@ -526,16 +532,23 @@ def __init__(self, filehandle, keep_full_data=False):
# create secondary structure table for merging to chain tables
# (will only contain helix/H and strand/E, coil/C will need to be filled in)
sse_raw = []
for sse_type, sse_table in [
("H", self.helix_table),
("E", self.sheet_table)
for sse_type, sse_table, sse_filter in [
("H", self.conf_table, "HELX"),
("E", self.sheet_table, None),
# also retrieve beta strands/bridges from conf_table if available
("E", self.conf_table, "STRN"),
]:
# skip if secondary structure element not present in PDB file at all
if sse_table is None:
continue

# filter table down to relevant entries for current secondary structure type
if sse_filter is not None:
sse_table = sse_table.query(
f"conformation_type.str.startswith('{sse_filter}')"
)

for _, row in sse_table.iterrows():
assert row.beg_label_asym_id == row.end_label_asym_id
for seq_id in range(row.beg_label_seq_id, row.end_label_seq_id + 1):
sse_raw.append({
"label_asym_id": row.beg_label_asym_id,
Expand Down Expand Up @@ -694,7 +707,7 @@ def get_chain(self, chain, model=0, is_author_id=True):
# create coordinate ID from author residue ID + insertion code
# (this should be unique and circumvents issues from 0 seqres values if selecting based on author chain ID)
coord_id=lambda df: df.auth_seq_id.astype(str) + df.insertion_code,
seqres_id=lambda df: df.label_seq_id.astype(str).replace("0", np.nan),
seqres_id=lambda df: df.label_seq_id.astype(str).replace("0", pd.NA).replace("", pd.NA),
one_letter_code=lambda df: df.label_comp_id.map(AA3_to_AA1, na_action="ignore"),
# note that MSE will now be labeled as HETATM, which was not the case with MMTF
hetatm=lambda df: df.record_type == "HETATM",
Expand All @@ -720,12 +733,13 @@ def get_chain(self, chain, model=0, is_author_id=True):
how="left"
)
else:
# initialize to pd.NA instead of np.nan or warning about assigning str to float64 column appears
res_sse = res.assign(
sec_struct_3state=np.nan
sec_struct_3state=pd.NA
)

res_sse.loc[
res_sse.sec_struct_3state.isnull() & (res_sse.label_seq_id > 0),
res_sse.sec_struct_3state.isnull() & res_sse.seqres_id.notnull(),
"sec_struct_3state"
] = "C"

Expand Down
64 changes: 64 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[project]
name = "evcouplings"
version = "0.2.1"
description = "A Framework for evolutionary couplings analysis"
readme = "README.md"
license = "MIT"
authors = [
{ name = "Thomas Hopf", email = "thomas.hopf@gmail.com" },
]
keywords = [
"analysis",
"couplings",
"evolutionary",
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
dependencies = [
"billiard",
"biopython>=1.84",
"bokeh",
"click",
"filelock",
"jinja2",
"matplotlib",
"msgpack",
"numba",
"numpy",
"pandas",
"psutil",
"requests",
"ruamel.yaml<0.18",
"scikit-learn",
"scipy",
"seaborn",
"setuptools>=18.2",
]

[project.scripts]
evcouplings = "evcouplings.utils.app:app"
evcouplings_dbupdate = "evcouplings.utils.update_database:app"
evcouplings_runcfg = "evcouplings.utils.pipeline:app"
evcouplings_summarize = "evcouplings.utils.summarize:app"

[project.urls]
Homepage = "https://github.com/debbiemarkslab/EVcouplings"

[tool.hatch.version]
path = "evcouplings/__init__.py"

[tool.hatch.build.targets.sdist]
include = [
"/evcouplings",
]
17 changes: 0 additions & 17 deletions requirements.txt

This file was deleted.

105 changes: 0 additions & 105 deletions setup.py

This file was deleted.

Loading