Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added Interface coverage cutoff #342

Merged
merged 3 commits into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/arctic3d/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,14 @@
default=0,
)

argument_parser.add_argument(
"--int_cov_cutoff",
help="Interface coverage cutoff (%)",
type=float,
required=False,
default=0.7,
)


def load_args(arguments):
"""
Expand Down Expand Up @@ -177,6 +185,7 @@ def main(
linkage_strategy,
threshold,
min_clust_size,
int_cov_cutoff,
log_level="DEBUG",
):
"""Main function."""
Expand Down Expand Up @@ -270,6 +279,7 @@ def main(
pdb_to_use=pdb_to_use,
chain_to_use=chain_to_use,
pdb_data=pdb_data_path,
int_cov_cutoff=int_cov_cutoff,
)

if pdb_f is None:
Expand All @@ -291,6 +301,7 @@ def main(
pdb_path=pdb_f,
linkage_strategy=linkage_strategy,
threshold=threshold,
int_cov_cutoff=int_cov_cutoff,
)

log.info(f"Clustered interfaces {cl_dict}")
Expand Down
8 changes: 6 additions & 2 deletions src/arctic3d/modules/cluster_interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
log = logging.getLogger("arctic3d.log")


def cluster_interfaces(interface_dict, pdb_path, linkage_strategy, threshold):
def cluster_interfaces(
interface_dict, pdb_path, linkage_strategy, threshold, int_cov_cutoff=0.7
):
"""
Wrapper to call interface_matrix and clustering

Expand All @@ -20,14 +22,16 @@ def cluster_interfaces(interface_dict, pdb_path, linkage_strategy, threshold):
linkage strategy for clustering
threshold : float
threshold for clustering
int_cov_cutoff : float
interface coverage cutoff

Returns
-------
clustered_residues : dict
dictionary of the clustered interfaces
"""
filtered_interfaces, matrix_path = interface_matrix(
interface_dict, pdb_path
interface_dict, pdb_path, int_cov_cutoff
)
if len(filtered_interfaces) > 0:
clustered_residues = interface_clustering(
Expand Down
22 changes: 15 additions & 7 deletions src/arctic3d/modules/interface_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from scipy.spatial.distance import cdist

SIGMA = 1.9
INTERFACE_COV_CUTOFF = 0.7

log = logging.getLogger("arctic3d.log")

Expand Down Expand Up @@ -154,7 +153,7 @@ def get_unique_sorted_resids(interface_dict):
return int_resids


def filter_interfaces(interface_dict, pdb_resids):
def filter_interfaces(interface_dict, pdb_resids, int_cov_cutoff=0.7):
"""
Filters the interfaces accoriding to the residues present in the pdb

Expand All @@ -166,24 +165,30 @@ def filter_interfaces(interface_dict, pdb_resids):
pdb_resids : np.array
residues present in the pdb

int_cov_cutoff : float
interface coverage cutoff

Returns
-------
retained_interfaces : dict
dictionary of the retained and filtered interfaces
example : interface_dict = {"a" : [1,2], "b" : [2,3,4], "c": [5,6,7]}
pdb_resids = np.array([3,4,5,6,7])
then, if INTERFACE_COV_CUTOFF < 0.66:
then, if int_cov_cutoff < 0.66:
retained_interfaces = {"b": [3,4], "c" : [5,6,7]}
else:
retained_interfaces = {"c" : [5,6,7]}
"""
log.debug("Filtering interface dictionary")
log.debug(
"Filtering interface dictionary "
f"with interface coverage cutoff = {int_cov_cutoff}"
)
retained_interfaces = {}
for key in interface_dict.keys():
coverage, filtered_interface = check_residues_coverage(
interface_dict[key], pdb_resids
)
if coverage > INTERFACE_COV_CUTOFF:
if coverage > int_cov_cutoff:
# formatting the interface name to avoid spaces
formatted_key = format_interface_name(key)
retained_interfaces[formatted_key] = filtered_interface
Expand All @@ -210,7 +215,7 @@ def format_interface_name(int_name):
return formatted_name


def interface_matrix(interface_dict, pdb_path):
def interface_matrix(interface_dict, pdb_path, int_cov_cutoff=0.7):
"""
Computes the interface matrix.

Expand All @@ -235,7 +240,10 @@ def interface_matrix(interface_dict, pdb_path):
raise Exception(f"pdb_path {pdb_path} does not exist")
mdu = mda.Universe(pdb_path)
pdb_resids = mdu.select_atoms("name CA").resids
retained_interfaces = filter_interfaces(interface_dict, pdb_resids)
retained_interfaces = filter_interfaces(
interface_dict, pdb_resids, int_cov_cutoff
)
print(f"retained_interfaces: {retained_interfaces}")
ret_keys = list(retained_interfaces.keys())
log.debug(f"Retained interfaces: {ret_keys}")
n_ret = len(ret_keys)
Expand Down
10 changes: 8 additions & 2 deletions src/arctic3d/modules/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def unlink_files(suffix="pdb", to_exclude=None):
fpath.unlink()


def get_maxint_pdb(validated_pdbs, interface_residues):
def get_maxint_pdb(validated_pdbs, interface_residues, int_cov_cutoff=0.7):
"""
Get PDB ID that retains the most interfaces.

Expand All @@ -700,6 +700,8 @@ def get_maxint_pdb(validated_pdbs, interface_residues):
List of (pdb_f, hit) tuples
interface_residues : dict
Dictionary of all the interfaces (each one with its uniprot ID as key)
int_cov_cutoff : float
Interface coverage cutoff.

Returns
-------
Expand Down Expand Up @@ -730,7 +732,7 @@ def get_maxint_pdb(validated_pdbs, interface_residues):
selection_string = f"name CA and chainID {chain_id.upper()}"
pdb_resids = mdu.select_atoms(selection_string).resids
tmp_filtered_interfaces = filter_interfaces(
interface_residues, pdb_resids
interface_residues, pdb_resids, int_cov_cutoff
)
curr_nint = len(tmp_filtered_interfaces)
if curr_nint > max_nint: # update "best" hit
Expand Down Expand Up @@ -790,6 +792,7 @@ def get_best_pdb(
pdb_to_use=None,
chain_to_use=None,
pdb_data=None,
int_cov_cutoff=0.7,
):
"""
Get best PDB ID.
Expand All @@ -806,6 +809,8 @@ def get_best_pdb(
Chain id to be used.
pdb_data : Path or None
pdb json file for offline mode.
int_cov_cutoff : float
Interface coverage cutoff.

Returns
-------
Expand Down Expand Up @@ -845,6 +850,7 @@ def get_best_pdb(
pdb_f, cif_f, top_hit, filtered_interfaces = get_maxint_pdb(
validated_pdbs_and_cifs,
interface_residues,
int_cov_cutoff=int_cov_cutoff,
)

if pdb_f is None or cif_f is None:
Expand Down
8 changes: 7 additions & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_cli_empty():
ligand=None,
linkage_strategy=None,
threshold=None,
int_cov_cutoff=None,
min_clust_size=None,
)
# assert exit code
Expand All @@ -38,7 +39,12 @@ def test_cli_empty():


def test_cli_full():
"""Test main cli with uniprot ID with one interface."""
target_uniprot = "W5JXD7"
exp_dir = Path(f"arctic3d-{target_uniprot}")
# delete folder if exists
if exp_dir.exists():
shutil.rmtree(exp_dir)
start_cwd = os.getcwd()
exit_code = main(
input_arg=target_uniprot,
Expand All @@ -56,10 +62,10 @@ def test_cli_full():
linkage_strategy=None,
threshold=None,
min_clust_size=1,
int_cov_cutoff=0.7,
)
assert exit_code == 0
os.chdir(start_cwd)
exp_dir = Path(f"arctic3d-{target_uniprot}")
assert exp_dir.exists() is True
# Check that the log file has been created
assert Path(exp_dir, "arctic3d.log").exists()
Expand Down
10 changes: 10 additions & 0 deletions tests/test_interface_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,16 @@ def test_filter_interfaces(example_mdu, example_interface_dict):
example_interface_dict, pdb_resids
)
assert expected_filter_dict == observed_filter_dict
# lower int_cov_cutoff
expected_filter_dict = {
"int_1": [1, 2],
"int_2": [1, 2, 4],
"int_3": [250],
}
observed_filter_dict = filter_interfaces(
example_interface_dict, pdb_resids, int_cov_cutoff=0.4
)
assert expected_filter_dict == observed_filter_dict


def test_interface_matrix(example_interface_dict, example_pdbpath):
Expand Down