Skip to content

Commit

Permalink
Merge pull request #1176 from haddocking/check_combination_chains
Browse files Browse the repository at this point in the history
refactored check for chain combinations
  • Loading branch information
mgiulini authored Dec 11, 2024
2 parents 0a4f4a8 + 3a68823 commit a4487ad
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 14 deletions.
16 changes: 3 additions & 13 deletions src/haddock/libs/libcns.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from haddock.libs.libfunc import false, true
from haddock.libs.libmath import RandomNumberGenerator
from haddock.libs.libontology import PDBFile
from haddock.libs.libpdb import check_combination_chains
from haddock.libs.libutil import transform_to_list


Expand Down Expand Up @@ -335,25 +336,14 @@ def prepare_cns_input(
# prepare chain/seg IDs
segid_str = ""
if native_segid:
chainid_list: list[str] = []
if isinstance(input_element, (list, tuple)):
for pdb in input_element:

segids, chains = libpdb.identify_chainseg(pdb.rel_path, sort=False)

chainsegs = sorted(list(set(segids) | set(chains)))
# check if any of chainsegs is already in chainid_list
if not identifier.endswith("scoring"):
if any(chainseg in chainid_list for chainseg in chainsegs):
raise ValueError(
f"Chain/seg IDs are not unique for pdbs {input_element}."
)
chainid_list.extend(chainsegs)
chainid_list = check_combination_chains(input_element)

for i, _chainseg in enumerate(chainid_list, start=1):
segid_str += write_eval_line(f"prot_segid_{i}", _chainseg)

else:
chainid_list: list[str] = []
segids, chains = libpdb.identify_chainseg(
input_element.rel_path, sort=False
)
Expand Down
17 changes: 16 additions & 1 deletion src/haddock/libs/libpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
Optional,
Union,
)
from haddock.libs.libio import working_directory
from haddock.libs.libio import working_directory, PDBFile
from haddock.libs.libutil import get_result_or_same_in_list, sort_numbered_paths


Expand Down Expand Up @@ -296,3 +296,18 @@ def read_RECORD_section(

read_chainids = partial(read_RECORD_section, section_slice=slc_chainid, func=list) # noqa: E501
read_segids = partial(read_RECORD_section, section_slice=slc_segid, func=list)


def check_combination_chains(combination: list[PDBFile]) -> list[str]:
"""Check if chain IDs are unique for each pdb in combination."""
chainid_list: list[str] = []
for pdb in combination:
segids, chains = identify_chainseg(pdb.rel_path, sort=False)
chainsegs = sorted(list(set(segids) | set(chains)))
# check if any of chainsegs is already in chainid_list
if any(chainseg in chainid_list for chainseg in chainsegs):
raise ValueError(
f"Chain/seg IDs are not unique for pdbs {combination}."
)
chainid_list.extend(chainsegs)
return chainid_list
2 changes: 2 additions & 0 deletions src/haddock/modules/sampling/rigidbody/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from haddock.libs.libcns import prepare_cns_input
from haddock.libs.libontology import PDBFile
from haddock.libs.libparallel import GenericTask, Scheduler
from haddock.libs.libpdb import check_combination_chains
from haddock.libs.libsubprocess import CNSJob
from haddock.modules import get_engine
from haddock.modules.base_cns_module import BaseCNSModule
Expand Down Expand Up @@ -132,6 +133,7 @@ def prepare_cns_input_parallel(
_l = []
idx = 1
for combination in models_to_dock:
check_combination_chains(combination)
for _ in range(sampling_factor):
ambig_fname = (
ambig_fnames[idx - 1]
Expand Down
29 changes: 29 additions & 0 deletions tests/test_libpdb.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
"""Test lib PDB."""
from pathlib import Path
import pytest


from haddock.libs import libpdb
from haddock.libs.libio import PDBFile

from . import golden_data

chainC = [
'ATOM 3 CA ARG C 4 37.080 43.455 -3.421 1.00 0.00 C C ', # noqa: E501
Expand Down Expand Up @@ -31,3 +35,28 @@ def test_read_chain_ids(lines, expected):
def test_read_seg_ids(lines, expected):
result = libpdb.read_segids(lines)
assert result == expected


@pytest.fixture(name="wrong_rigid_molecules")
def fixture_wrong_rigidbody_molecules():
"""fixture for wrong rigidbody input molecules."""
receptor = PDBFile(Path(golden_data, "protprot_complex_1.pdb"))
ligand = PDBFile(Path(golden_data, "protprot_complex_2.pdb"))
return [receptor, ligand]

@pytest.fixture(name="good_rigid_molecules")
def fixture_good_rigidbody_molecules():
"""fixture for good rigidbody input molecules."""
receptor = PDBFile(Path(golden_data, "e2aP_1F3G_haddock.pdb"))
ligand = PDBFile(Path(golden_data, "hpr_ensemble_1_haddock.pdb"))
return [receptor, ligand]

def test_check_combination_chains(good_rigid_molecules, wrong_rigid_molecules):
"""Test check_combination_chains."""
exp_chains = ["A", "B"]
obs_chains = libpdb.check_combination_chains(good_rigid_molecules)
assert obs_chains == exp_chains
# when input molecules share chains there should be a ValueError
with pytest.raises(ValueError):
libpdb.check_combination_chains(wrong_rigid_molecules)

0 comments on commit a4487ad

Please sign in to comment.