From 4578c43c051c0ab54e81299b7b4e985566fedad6 Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:14:00 +0100 Subject: [PATCH 01/12] Add cli option for plddt and LUR module --- cath_alphaflow/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cath_alphaflow/cli.py b/cath_alphaflow/cli.py index fdf8c33..59de3b9 100644 --- a/cath_alphaflow/cli.py +++ b/cath_alphaflow/cli.py @@ -7,6 +7,7 @@ from .commands import optimise_domain_boundaries from .commands import convert_dssp_to_sse_summary from .commands import convert_cif_to_dssp +from .commands import extract_plddt_and_lur logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s" @@ -47,3 +48,4 @@ def dump_config(): cli.add_command(optimise_domain_boundaries.optimise_domain_boundaries) cli.add_command(convert_dssp_to_sse_summary.convert_dssp_to_sse_summary) cli.add_command(convert_cif_to_dssp.convert_cif_to_dssp) +cli.add_command(extract_plddt_and_lur.convert_cif_to_plddt_summary) From 3e13468756585ccc5af884173191b213bad0818a Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:14:46 +0100 Subject: [PATCH 02/12] Add minimum length for LUR as constant --- cath_alphaflow/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cath_alphaflow/constants.py b/cath_alphaflow/constants.py index 255a607..46b5a42 100644 --- a/cath_alphaflow/constants.py +++ b/cath_alphaflow/constants.py @@ -2,3 +2,4 @@ DEFAULT_DSSP_SUFFIX = ".dssp" DEFAULT_HELIX_MIN_LENGTH = 3 DEFAULT_STRAND_MIN_LENGTH = 2 +MIN_LENGTH_LUR = 5 From 63f56998129f060e77f1e25a69a889fcb4533579 Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:15:05 +0100 Subject: [PATCH 03/12] Add plddt summary writer --- cath_alphaflow/io_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cath_alphaflow/io_utils.py b/cath_alphaflow/io_utils.py index c99d58e..62caa1e 100644 --- a/cath_alphaflow/io_utils.py +++ b/cath_alphaflow/io_utils.py @@ -53,6 +53,19 @@ def get_sse_summary_writer(csvfile): return writer +def get_plddt_summary_writer(csvfile): + writer = get_csv_dictwriter( + csvfile, + fieldnames=[ + "af_domain_id", + "avg_plddt", + "perc_LUR", + ], + ) + writer.writeheader() + return writer + + class AFDomainIDReader(csv.DictReader): def __init__(self, *args): self._seen_header = False From d20661d2a7fb949d516b4aa2c042f9c067ec935c Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:15:22 +0100 Subject: [PATCH 04/12] Add pLDDTSummary as dataclass --- cath_alphaflow/models.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cath_alphaflow/models.py b/cath_alphaflow/models.py index 6521079..4af2cd1 100644 --- a/cath_alphaflow/models.py +++ b/cath_alphaflow/models.py @@ -194,3 +194,10 @@ def new_from_dssp_str( ) return ss_sum + + +@dataclass +class pLDDTSummary: + af_domain_id: str + avg_plddt: float + perc_LUR: float From 8878e564bcfa4390a180b02a400c419619d5d9ff Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:16:06 +0100 Subject: [PATCH 05/12] Remove DEFAULTs for sse lengths, move to models --- cath_alphaflow/commands/convert_dssp_to_sse_summary.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cath_alphaflow/commands/convert_dssp_to_sse_summary.py b/cath_alphaflow/commands/convert_dssp_to_sse_summary.py index 46b5905..604d774 100644 --- a/cath_alphaflow/commands/convert_dssp_to_sse_summary.py +++ b/cath_alphaflow/commands/convert_dssp_to_sse_summary.py @@ -8,11 +8,7 @@ get_sse_summary_writer, ) from cath_alphaflow.models import SecStrSummary -from cath_alphaflow.constants import ( - DEFAULT_DSSP_SUFFIX, - DEFAULT_HELIX_MIN_LENGTH, - DEFAULT_STRAND_MIN_LENGTH, -) +from cath_alphaflow.constants import DEFAULT_DSSP_SUFFIX @click.command() @@ -60,8 +56,6 @@ def get_sse_summary_from_dssp( dssp_string = [] read_headers = False - domain_length = 0 - ss_total = 0 if acc_id is None: acc_id = dssp_path.stem From 367cd326708f3dfca5fdc7a40bc3e4c3dd2c37a0 Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:16:41 +0100 Subject: [PATCH 06/12] Add pLDDT and LUR module from CIF files to summary --- .../commands/extract_plddt_and_lur.py | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 cath_alphaflow/commands/extract_plddt_and_lur.py diff --git a/cath_alphaflow/commands/extract_plddt_and_lur.py b/cath_alphaflow/commands/extract_plddt_and_lur.py new file mode 100644 index 0000000..4322216 --- /dev/null +++ b/cath_alphaflow/commands/extract_plddt_and_lur.py @@ -0,0 +1,122 @@ +from pathlib import Path +from Bio.PDB import MMCIF2Dict +import logging +import click +from cath_alphaflow.io_utils import ( + yield_first_col, + get_plddt_summary_writer, +) +from cath_alphaflow.models import pLDDTSummary +from cath_alphaflow.constants import MIN_LENGTH_LUR + +LOG = logging.getLogger() + + +@click.command() +@click.option( + "--cif_in_dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True), + required=True, + help="Input: directory of CIF files", +) +@click.option( + "--id_file", + type=click.File("rt"), + required=True, + help="Input: CSV file containing list of ids to process from CIF to pLDDT", +) +@click.option( + "--plddt_stats_file", + type=click.File("wt"), + required=True, + help="Output: pLDDT and LUR output file", +) +@click.option( + "--cif_suffix", + type=str, + default=".cif", + help="Option: suffix to use for mmCIF files (default: .cif)", +) +def convert_cif_to_plddt_summary( + cif_in_dir, + id_file, + plddt_stats_file, + cif_suffix, +): + "Creates summary of secondary structure elements (SSEs) from DSSP files" + + plddt_out_writer = get_plddt_summary_writer(plddt_stats_file) + + for file_stub in yield_first_col(id_file): + cif_path = Path(cif_in_dir) / f"{file_stub}{cif_suffix}" + if not cif_path.exists(): + msg = f"failed to locate CIF input file {cif_path}" + LOG.error(msg) + raise FileNotFoundError(msg) + + avg_plddt = get_average_plddt_from_plddt_string(cif_path) + perc_LUR = get_LUR_residues_percentage(cif_path) + plddt_stats = pLDDTSummary( + af_domain_id=file_stub, avg_plddt=avg_plddt, perc_LUR=perc_LUR + ) + plddt_out_writer.writerow(plddt_stats.__dict__) + + click.echo("DONE") + + +def get_average_plddt_from_plddt_string( + cif_path: Path, *, chopping=None, acc_id=None +) -> pLDDTSummary: + if acc_id is None: + acc_id = cif_path.stem + mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_path) + chain_plddt = mmcif_dict["_ma_qa_metric_global.metric_value"][0] + plddt_string = mmcif_dict["_ma_qa_metric_local.metric_value"] + segment_plddt = "" + if chopping: + for segment in chopping.segments: + segment_plddt += plddt_string[(segment.start - 1) : segment.end] + domain_length = len(segment_plddt) + average_plddt = round((sum(segment_plddt) / domain_length) * 100, 2) + + else: + average_plddt = chain_plddt + return average_plddt + + +def get_LUR_residues_percentage( + cif_path: Path, *, chopping=None, acc_id=None +) -> pLDDTSummary: + if acc_id is None: + acc_id = cif_path.stem + mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_path) + plddt_string = mmcif_dict["_ma_qa_metric_local.metric_value"] + segment_plddt = "" + if chopping: + for segment in chopping.segments: + segment_plddt += plddt_string[(segment.start - 1) : segment.end] + else: + segment_plddt = plddt_string + # Calculate LUR + LUR_perc = 0 + LUR_total = 0 + LUR_res = 0 + LUR_stretch = False + min_res_lur = MIN_LENGTH_LUR + for residue in segment_plddt: + plddt_res = float(residue) + if plddt_res < 90: + LUR_res += 1 + if LUR_stretch == True: + LUR_total += 1 + + if LUR_res == min_res_lur and LUR_stretch == False: + LUR_stretch = True + LUR_total += min_res_lur + + else: + LUR_stretch = False + LUR_res = 0 + LUR_perc = round(LUR_total / len(segment_plddt) * 100, 2) + + return LUR_perc From e82295017753d3df2c78f8c2da9d3f2fbde7754f Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Tue, 11 Oct 2022 17:39:39 +0100 Subject: [PATCH 07/12] move domain_length out of loop to solve average after loop --- cath_alphaflow/commands/extract_plddt_and_lur.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cath_alphaflow/commands/extract_plddt_and_lur.py b/cath_alphaflow/commands/extract_plddt_and_lur.py index 4322216..659db5e 100644 --- a/cath_alphaflow/commands/extract_plddt_and_lur.py +++ b/cath_alphaflow/commands/extract_plddt_and_lur.py @@ -76,8 +76,8 @@ def get_average_plddt_from_plddt_string( if chopping: for segment in chopping.segments: segment_plddt += plddt_string[(segment.start - 1) : segment.end] - domain_length = len(segment_plddt) - average_plddt = round((sum(segment_plddt) / domain_length) * 100, 2) + domain_length = len(segment_plddt) + average_plddt = round((sum(segment_plddt) / domain_length) * 100, 2) else: average_plddt = chain_plddt From 1b2c5f2a201ebabc16843c024e97f5a37db1e79e Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Wed, 12 Oct 2022 17:29:14 +0100 Subject: [PATCH 08/12] Add LUR residues and total_residues in pLDDTSummary --- cath_alphaflow/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cath_alphaflow/models.py b/cath_alphaflow/models.py index 4af2cd1..aedfafa 100644 --- a/cath_alphaflow/models.py +++ b/cath_alphaflow/models.py @@ -201,3 +201,5 @@ class pLDDTSummary: af_domain_id: str avg_plddt: float perc_LUR: float + LUR_residues: int + total_residues: int From d96a5115f2e1a9e893f15f28d75c0f6ad57ae2c0 Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Wed, 12 Oct 2022 17:32:02 +0100 Subject: [PATCH 09/12] Add gzip open to deal with gzipped cifs Fix LUR and pLDDT summary given a chopping segment. Change LUR threshold from 90 to 70 (residual from testing). Return LUR_perc,LUR_total and segment_length --- .../commands/extract_plddt_and_lur.py | 49 ++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/cath_alphaflow/commands/extract_plddt_and_lur.py b/cath_alphaflow/commands/extract_plddt_and_lur.py index 659db5e..78197e8 100644 --- a/cath_alphaflow/commands/extract_plddt_and_lur.py +++ b/cath_alphaflow/commands/extract_plddt_and_lur.py @@ -1,4 +1,5 @@ from pathlib import Path +import gzip from Bio.PDB import MMCIF2Dict import logging import click @@ -66,37 +67,51 @@ def convert_cif_to_plddt_summary( def get_average_plddt_from_plddt_string( cif_path: Path, *, chopping=None, acc_id=None -) -> pLDDTSummary: +) -> float: if acc_id is None: acc_id = cif_path.stem - mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_path) + open_func = open + if cif_path.name.endswith(".gz"): + open_func = gzip.open + with open_func(str(cif_path), mode="rt") as cif_fh: + mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_fh) chain_plddt = mmcif_dict["_ma_qa_metric_global.metric_value"][0] - plddt_string = mmcif_dict["_ma_qa_metric_local.metric_value"] - segment_plddt = "" + plddt_strings = mmcif_dict["_ma_qa_metric_local.metric_value"] + chopping_plddt = [] if chopping: for segment in chopping.segments: - segment_plddt += plddt_string[(segment.start - 1) : segment.end] - domain_length = len(segment_plddt) - average_plddt = round((sum(segment_plddt) / domain_length) * 100, 2) + segment_plddt = [ + float(plddt) + for plddt in plddt_strings[int(segment.start) - 1 : int(segment.end)] + ] + chopping_plddt += segment_plddt + domain_length = len(chopping_plddt) + average_plddt = round((sum(chopping_plddt) / domain_length), 2) else: average_plddt = chain_plddt return average_plddt -def get_LUR_residues_percentage( - cif_path: Path, *, chopping=None, acc_id=None -) -> pLDDTSummary: +def get_LUR_residues_percentage(cif_path: Path, *, chopping=None, acc_id=None): if acc_id is None: acc_id = cif_path.stem - mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_path) - plddt_string = mmcif_dict["_ma_qa_metric_local.metric_value"] - segment_plddt = "" + open_func = open + if cif_path.name.endswith(".gz"): + open_func = gzip.open + with open_func(str(cif_path), mode="rt") as cif_fh: + mmcif_dict = MMCIF2Dict.MMCIF2Dict(cif_fh) + plddt_strings = mmcif_dict["_ma_qa_metric_local.metric_value"] + chopping_plddt = [] if chopping: for segment in chopping.segments: - segment_plddt += plddt_string[(segment.start - 1) : segment.end] + segment_plddt = [ + float(plddt) + for plddt in plddt_strings[int(segment.start) - 1 : int(segment.end)] + ] + chopping_plddt += segment_plddt else: - segment_plddt = plddt_string + segment_plddt = plddt_strings # Calculate LUR LUR_perc = 0 LUR_total = 0 @@ -105,7 +120,7 @@ def get_LUR_residues_percentage( min_res_lur = MIN_LENGTH_LUR for residue in segment_plddt: plddt_res = float(residue) - if plddt_res < 90: + if plddt_res < 70: LUR_res += 1 if LUR_stretch == True: LUR_total += 1 @@ -119,4 +134,4 @@ def get_LUR_residues_percentage( LUR_res = 0 LUR_perc = round(LUR_total / len(segment_plddt) * 100, 2) - return LUR_perc + return LUR_perc, LUR_total, len(segment_plddt) From 86816d41a66d7fa40d76c494816d04d9566de89e Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Wed, 12 Oct 2022 17:32:25 +0100 Subject: [PATCH 10/12] Add unit tests for pLDDT and LUR summaries. --- tests/test_extract_plddt_and_lur.py | 71 +++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/test_extract_plddt_and_lur.py diff --git a/tests/test_extract_plddt_and_lur.py b/tests/test_extract_plddt_and_lur.py new file mode 100644 index 0000000..dbe271c --- /dev/null +++ b/tests/test_extract_plddt_and_lur.py @@ -0,0 +1,71 @@ +import os +from pathlib import Path +import csv +from symbol import comp_if +from click.testing import CliRunner +from cath_alphaflow.cli import cli +from cath_alphaflow.commands.extract_plddt_and_lur import ( + get_average_plddt_from_plddt_string, + get_LUR_residues_percentage, +) +from cath_alphaflow.models import Chopping, SecStrSummary, Segment + + +UNIPROT_IDS = ["P00520"] +FIXTURE_PATH = Path(__file__).parent / "fixtures" +EXAMPLE_CIF_FILE = FIXTURE_PATH / "cif" / "AF-P00520-F1-model_v3.cif.gz" + +SUBCOMMAND = "convert-cif-to-plddt-summary" + + +def test_cli_usage(): + runner = CliRunner() + with runner.isolated_filesystem(): + result = runner.invoke(cli, [SUBCOMMAND, "--help"]) + assert result.exit_code == 0 + assert "Usage:" in result.output + + +def create_fake_cif_path(dirname, cif_id, cif_src=EXAMPLE_CIF_FILE): + dir_path = Path(dirname) + dir_path.mkdir() + + cif_path_dest = dir_path / f"{cif_id}.cif.gz" + os.symlink(cif_src, f"{cif_path_dest}") + return cif_path_dest + + +def test_extract_plddt_summary(tmp_path): + acc_id = "test1" + cif_path = create_fake_cif_path(tmp_path.name, acc_id) + chopping = Chopping(segments=[Segment("10", "20")]) + + average_plddt = get_average_plddt_from_plddt_string( + cif_path, chopping=chopping, acc_id=acc_id + ) + + assert average_plddt == 33.71 + + chopping = Chopping(segments=[Segment("10", "20"), Segment("20", "35")]) + + average_plddt = get_average_plddt_from_plddt_string( + cif_path, chopping=chopping, acc_id=acc_id + ) + + assert average_plddt == 32.88 + + +def test_extract_LUR_summary(tmp_path): + acc_id = "test1" + cif_path = create_fake_cif_path(tmp_path.name, acc_id) + chopping = Chopping(segments=[Segment("10", "20")]) + + LUR = get_LUR_residues_percentage(cif_path, chopping=chopping, acc_id=acc_id) + + assert LUR == (100.0, 11, 11) + + chopping = Chopping(segments=[Segment("1", "200"), Segment("200", "1120")]) + + LUR = get_LUR_residues_percentage(cif_path, chopping=chopping, acc_id=acc_id) + + assert LUR == (57.87, 533, 921) From 2c2b57740b5e5518634350fec86f5c89d6fada7d Mon Sep 17 00:00:00 2001 From: Ian Sillitoe Date: Wed, 12 Oct 2022 17:58:43 +0100 Subject: [PATCH 11/12] add LUR summary as model, correct tests --- .../commands/extract_plddt_and_lur.py | 16 +++++---- cath_alphaflow/models.py | 11 +++++-- tests/test_extract_plddt_and_lur.py | 33 +++++++++++++++---- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/cath_alphaflow/commands/extract_plddt_and_lur.py b/cath_alphaflow/commands/extract_plddt_and_lur.py index 78197e8..96a5005 100644 --- a/cath_alphaflow/commands/extract_plddt_and_lur.py +++ b/cath_alphaflow/commands/extract_plddt_and_lur.py @@ -7,7 +7,7 @@ yield_first_col, get_plddt_summary_writer, ) -from cath_alphaflow.models import pLDDTSummary +from cath_alphaflow.models import LURSummary, pLDDTSummary from cath_alphaflow.constants import MIN_LENGTH_LUR LOG = logging.getLogger() @@ -56,9 +56,11 @@ def convert_cif_to_plddt_summary( raise FileNotFoundError(msg) avg_plddt = get_average_plddt_from_plddt_string(cif_path) - perc_LUR = get_LUR_residues_percentage(cif_path) + perc_LUR_summary = get_LUR_residues_percentage(cif_path) plddt_stats = pLDDTSummary( - af_domain_id=file_stub, avg_plddt=avg_plddt, perc_LUR=perc_LUR + af_domain_id=file_stub, + avg_plddt=avg_plddt, + perc_LUR=perc_LUR_summary.LUR_perc, ) plddt_out_writer.writerow(plddt_stats.__dict__) @@ -122,10 +124,10 @@ def get_LUR_residues_percentage(cif_path: Path, *, chopping=None, acc_id=None): plddt_res = float(residue) if plddt_res < 70: LUR_res += 1 - if LUR_stretch == True: + if LUR_stretch: LUR_total += 1 - if LUR_res == min_res_lur and LUR_stretch == False: + if LUR_res == min_res_lur and not LUR_stretch: LUR_stretch = True LUR_total += min_res_lur @@ -134,4 +136,6 @@ def get_LUR_residues_percentage(cif_path: Path, *, chopping=None, acc_id=None): LUR_res = 0 LUR_perc = round(LUR_total / len(segment_plddt) * 100, 2) - return LUR_perc, LUR_total, len(segment_plddt) + return LURSummary( + LUR_perc=LUR_perc, LUR_total=LUR_total, residues_total=len(segment_plddt) + ) diff --git a/cath_alphaflow/models.py b/cath_alphaflow/models.py index aedfafa..43bf794 100644 --- a/cath_alphaflow/models.py +++ b/cath_alphaflow/models.py @@ -124,6 +124,13 @@ def to_str(self): return self.af_domain_id +@dataclass +class LURSummary: + LUR_perc: float + LUR_total: int + residues_total: int + + @dataclass class SecStrSummary: af_domain_id: str @@ -170,13 +177,13 @@ def new_from_dssp_str( for residue in dssp_str: if residue == "H": sse_H_res += 1 - if sse_H_res >= min_helix_length and sse_H == False: + if sse_H_res >= min_helix_length and not sse_H: sse_H = True sse_H_num += 1 if residue == "E": sse_E_res += 1 - if sse_E_res >= min_strand_length and sse_E == False: + if sse_E_res >= min_strand_length and not sse_E: sse_E = True sse_E_num += 1 diff --git a/tests/test_extract_plddt_and_lur.py b/tests/test_extract_plddt_and_lur.py index dbe271c..0a1a66b 100644 --- a/tests/test_extract_plddt_and_lur.py +++ b/tests/test_extract_plddt_and_lur.py @@ -1,14 +1,12 @@ import os from pathlib import Path -import csv -from symbol import comp_if from click.testing import CliRunner from cath_alphaflow.cli import cli from cath_alphaflow.commands.extract_plddt_and_lur import ( get_average_plddt_from_plddt_string, get_LUR_residues_percentage, ) -from cath_alphaflow.models import Chopping, SecStrSummary, Segment +from cath_alphaflow.models import Chopping, LURSummary, Segment UNIPROT_IDS = ["P00520"] @@ -55,17 +53,38 @@ def test_extract_plddt_summary(tmp_path): assert average_plddt == 32.88 +def get_total_residues_from_chopping(chopping): + return sum([int(seg.end) - int(seg.start) + 1 for seg in chopping.segments]) + + def test_extract_LUR_summary(tmp_path): acc_id = "test1" cif_path = create_fake_cif_path(tmp_path.name, acc_id) chopping = Chopping(segments=[Segment("10", "20")]) - LUR = get_LUR_residues_percentage(cif_path, chopping=chopping, acc_id=acc_id) + lur_summary = get_LUR_residues_percentage( + cif_path, chopping=chopping, acc_id=acc_id + ) - assert LUR == (100.0, 11, 11) + assert lur_summary == LURSummary( + LUR_perc=100.0, + LUR_total=11, + residues_total=get_total_residues_from_chopping(chopping), + ) + # clean up after test + del chopping + del lur_summary chopping = Chopping(segments=[Segment("1", "200"), Segment("200", "1120")]) - LUR = get_LUR_residues_percentage(cif_path, chopping=chopping, acc_id=acc_id) + lur_summary = get_LUR_residues_percentage( + cif_path, chopping=chopping, acc_id=acc_id + ) - assert LUR == (57.87, 533, 921) + assert lur_summary == LURSummary( + LUR_perc=57.87, + LUR_total=533, + residues_total=get_total_residues_from_chopping(chopping), + ) + del chopping + del lur_summary From f974aeb930b8e0129d3e2b8bb9d0373bad02b98a Mon Sep 17 00:00:00 2001 From: Nicola Bordin Date: Wed, 12 Oct 2022 18:05:33 +0100 Subject: [PATCH 12/12] Change segment_plddt to chopping_plddt as length to check against. Adjusted values in test to reflect change. --- cath_alphaflow/commands/extract_plddt_and_lur.py | 6 +++--- tests/test_extract_plddt_and_lur.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cath_alphaflow/commands/extract_plddt_and_lur.py b/cath_alphaflow/commands/extract_plddt_and_lur.py index 96a5005..5fa2b8d 100644 --- a/cath_alphaflow/commands/extract_plddt_and_lur.py +++ b/cath_alphaflow/commands/extract_plddt_and_lur.py @@ -113,7 +113,7 @@ def get_LUR_residues_percentage(cif_path: Path, *, chopping=None, acc_id=None): ] chopping_plddt += segment_plddt else: - segment_plddt = plddt_strings + chopping_plddt = plddt_strings # Calculate LUR LUR_perc = 0 LUR_total = 0 @@ -134,8 +134,8 @@ def get_LUR_residues_percentage(cif_path: Path, *, chopping=None, acc_id=None): else: LUR_stretch = False LUR_res = 0 - LUR_perc = round(LUR_total / len(segment_plddt) * 100, 2) + LUR_perc = round(LUR_total / len(chopping_plddt) * 100, 2) return LURSummary( - LUR_perc=LUR_perc, LUR_total=LUR_total, residues_total=len(segment_plddt) + LUR_perc=LUR_perc, LUR_total=LUR_total, residues_total=len(chopping_plddt) ) diff --git a/tests/test_extract_plddt_and_lur.py b/tests/test_extract_plddt_and_lur.py index 0a1a66b..e90ec54 100644 --- a/tests/test_extract_plddt_and_lur.py +++ b/tests/test_extract_plddt_and_lur.py @@ -82,7 +82,7 @@ def test_extract_LUR_summary(tmp_path): ) assert lur_summary == LURSummary( - LUR_perc=57.87, + LUR_perc=47.55, LUR_total=533, residues_total=get_total_residues_from_chopping(chopping), )