diff --git a/README.md b/README.md index 5baccc9..0c54731 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ pip install pymissense Generate usage is: ``` -usage: pymissense [-h] [--pdbpath PDBPATH] [--maxacid MAXACID] uniprot_id output_path +usage: pymissense[-h] [--tsv TSV] [--pdbpath PDBPATH] [--maxacid MAXACID] uniprot_id output_path AlphaMissense plot and pdb generator @@ -39,13 +39,14 @@ positional arguments: options: -h, --help show this help message and exit + --tsv TSV You can provide the path to the tsv file if you want to skip the download. (default: None) --pdbpath PDBPATH If defined, it will write the pathogencity as bfactor in that PDB. If its not defined or not existing it will instead download the alphafold predicted PDB (default: None) - --maxacid MAXACID Maximum squence number to use. (default: None) + --maxacid MAXACID Maximum squence number to use in the plot. (default: None) ``` You can provide the optional argument `--pdbpath` if you want to use an experimental PDB, otherwise it will instead download the alphafold predicted PDB. -For example, to reproduce [Figure 3D](https://www.science.org/doi/10.1126/science.adg7492#F3) (the middle one) and the generate the PDB shown in [Figure 3E](https://www.science.org/doi/10.1126/science.adg7492#F3) do: +For example, to reproduce [Figure 3D](https://www.science.org/doi/10.1126/science.adg7492#F3) (the middle one) and generates the PDB shown in [Figure 3E](https://www.science.org/doi/10.1126/science.adg7492#F3) do: ``` wget https://files.rcsb.org/download/7UPI.pdb diff --git a/missense/missense.py b/missense/missense.py index 7e7b95a..d77690c 100644 --- a/missense/missense.py +++ b/missense/missense.py @@ -401,11 +401,13 @@ def gen_image(pos_to_val) -> np.array: return img -def get_data_tuple(uniprot_id: str): +def get_data_tuple(uniprot_id: str, tsv_path: str = None): """ Extracts the raw data for the plot from the tsv file. """ - with open(os.path.join(tempfile.gettempdir(), "alpha.tsv"), encoding="utf-8") as f: + if tsv_path is None: + tsv_path = os.path.join(tempfile.gettempdir(), "alpha.tsv") + with open(tsv_path, encoding="utf-8") as f: doc = f.read() m = re.findall(uniprot_id.upper() + r"\t(.\d+.)\t(\d.\d+)", doc) pos_to_val = [] @@ -457,6 +459,13 @@ def create_parser() -> argparse.ArgumentParser: help="Output folder", ) + parser.add_argument( + "--tsv", + type=str, + help="You can provide the path to the tsv file if you want to skip the download.", + default=None + ) + parser.add_argument( "--pdbpath", type=str, @@ -547,8 +556,17 @@ def create_modified_pdb(img: np.array, uniprot_id: str, output_path: str, pdb_pt else: out_file.write(f'{line}') -def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): - download_missense_data() +def _run(uniprot_id: str, + output_path: str, + tsvpath: str, + pdbpath: str, + maxacid: int): + + if tsvpath is None or os.path.exists(tsvpath)==False: + tsvpath=None + download_missense_data() + + os.makedirs(output_path, exist_ok=True) chain = None @@ -559,7 +577,11 @@ def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): print(f"Cant find chain for {uniprot_id} in {pdbpath}") sys.exit(1) - pos_to_val = get_data_tuple(uniprot_id) + pos_to_val = get_data_tuple(uniprot_id=uniprot_id, tsv_path=tsvpath) + + if len(pos_to_val) == 0: + print(f"Could not find any data in the AlphaMissense database for uniprot id {uniprot_id}") + sys.exit(1) out_fig_pth = os.path.join(output_path, f"{uniprot_id}.pdf") img_raw_data = make_and_save_plot(pos_to_val, out_fig_pth, maxacid) @@ -570,7 +592,11 @@ def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int): def _main_(): args = create_parser().parse_args() - _run(args.uniprot_id, args.output_path, args.pdbpath, args.maxacid) + _run(uniprot_id=args.uniprot_id, + output_path=args.output_path, + pdbpath=args.pdbpath, + maxacid=args.maxacid, + tsvpath=args.tsv) diff --git a/tests/test_run.py b/tests/test_run.py index 3724f5f..29c5d9e 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -28,7 +28,35 @@ def test_pdf_and_and_are_generated(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=None + pdbpath=None, + tsvpath=None + ) + + self.assertEqual(True, os.path.exists(os.path.join(tmpdirname,"Q9UQ13.pdf"))) + self.assertEqual(True, os.path.exists(os.path.join(tmpdirname, "Q9UQ13-edit.pdb"))) + + def test_pdf_and_and_are_generated_non_existing_uniprot(self): + + shutil.copyfile(os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/alpha.tsv"), + os.path.join(tempfile.gettempdir(), "alpha.tsv")) + + with tempfile.TemporaryDirectory() as tmpdirname: + with self.assertRaises(SystemExit): + ms._run(uniprot_id="Q9UQ13234234", + output_path=tmpdirname, + maxacid=200, + pdbpath=None, + tsvpath=None + ) + + def test_pdf_and_and_are_generated_tsv_by_path(self): + + with tempfile.TemporaryDirectory() as tmpdirname: + ms._run(uniprot_id="Q9UQ13", + output_path=tmpdirname, + maxacid=200, + pdbpath=None, + tsvpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/alpha.tsv") ) self.assertEqual(True, os.path.exists(os.path.join(tmpdirname,"Q9UQ13.pdf"))) @@ -43,7 +71,8 @@ def test_pdb_check_with_reference(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=None + pdbpath=None, + tsvpath=None ) ref_pth = os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13/Q9UQ13-edit.pdb") @@ -62,7 +91,8 @@ def test_pdb_check_with_reference_with_pdb(self): ms._run(uniprot_id="Q9UQ13", output_path=tmpdirname, maxacid=200, - pdbpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/7upi.pdb") + pdbpath=os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/7upi.pdb"), + tsvpath=None ) ref_pth = os.path.join(os.path.dirname(__file__), "../resources/tests/Q9UQ13-with-pdb/Q9UQ13-edit.pdb")