Skip to content

Commit

Permalink
Merge pull request #2 from MPI-Dortmund/thorstenwagner-patch-1
Browse files Browse the repository at this point in the history
Add unit tests and automatic versioning + deployment
  • Loading branch information
thorstenwagner authored Sep 28, 2023
2 parents fb5ac98 + e428896 commit eae0343
Show file tree
Hide file tree
Showing 11 changed files with 33,242 additions and 28 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Pylint

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint pytest pytest-coverage
pip install .
- name: Analysing the code with pylint
run: |
pylint -E $(git ls-files '*.py')
- name: Tests
run: |
pytest -v --cov=./ --cov-report=xml --cov-config=.coveragerc
deploy:
# this will run when you have tagged a commit, starting with "v*"
# and requires that you have put your twine API key in your
# github secrets (see readme for details)
needs: [ build ]
runs-on: ubuntu-latest
if: contains(github.ref, 'tags')
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -U setuptools setuptools_scm wheel twine build
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TWINE_API_KEY }}
run: |
git tag
python -m build .
twine upload dist/*
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ options:
--maxacid MAXACID Maximum squence number to use. (default: None)
```

You can provide the optional argument `--pdbpath` if you want to use an experimental PDB, otherwise it will instead download the alphafold predicted PDB. One example could be:
You can provide the optional argument `--pdbpath` if you want to use an experimental PDB, otherwise it will instead download the alphafold predicted PDB.

For example, to reproduce [Figure 3D](https://www.science.org/doi/10.1126/science.adg7492#F3) (the middle one) and the generate the PDB shown in [Figure 3E](https://www.science.org/doi/10.1126/science.adg7492#F3) do:

Expand Down
63 changes: 38 additions & 25 deletions missense/missense.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,9 +405,9 @@ def get_data_tuple(uniprot_id: str):
"""
Extracts the raw data for the plot from the tsv file.
"""
with open(os.path.join(tempfile.gettempdir(), "alpha.tsv")) as f:
with open(os.path.join(tempfile.gettempdir(), "alpha.tsv"), encoding="utf-8") as f:
doc = f.read()
m = re.findall(uniprot_id.upper() + "\t(.\d+.)\t(\d.\d+)", doc)
m = re.findall(uniprot_id.upper() + r"\t(.\d+.)\t(\d.\d+)", doc)
pos_to_val = []

for g in m:
Expand All @@ -424,7 +424,7 @@ def download_missense_data():
'''
alphafile=os.path.join(tempfile.gettempdir(), "alpha.tsv")
if not os.path.exists(alphafile):
url = ("https://zenodo.org/record/8208688/files/AlphaMissense_aa_substitutions.tsv.gz?download=1/")
url = "https://zenodo.org/record/8208688/files/AlphaMissense_aa_substitutions.tsv.gz?download=1/"
filename = os.path.join(tempfile.gettempdir(), "alpha.tsv.gz")
print("Download to", filename, " ...")
urlretrieve(url, filename)
Expand All @@ -439,6 +439,9 @@ def download_missense_data():


def create_parser() -> argparse.ArgumentParser:
'''
Create the argument parser
'''
parser = argparse.ArgumentParser(
description="AlphaMissense plot and pdb generator",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
Expand Down Expand Up @@ -472,8 +475,12 @@ def create_parser() -> argparse.ArgumentParser:


def make_and_save_plot(pos_to_val, out_file: str, maxpos: int =None) -> np.array:
"""
Create the plot at saves it to disk.
:return The raw data for the plot
"""
img = gen_image(pos_to_val)
fig, ax = pyplot.subplots(1, 1)
_, ax = pyplot.subplots(1, 1)

ax.imshow(img, aspect='auto', interpolation='none', cmap="bwr")

Expand All @@ -498,26 +505,32 @@ def make_and_save_plot(pos_to_val, out_file: str, maxpos: int =None) -> np.array
return img

def get_chain(uniprot_id,pdb_pth:str):
'''
A PDB might have multiple chains, where only one belongs to a certain UNIPROT-ID. This function tries to retrieve the corresponding chain.
'''
with open(pdb_pth, mode="rt", encoding="utf-8") as f:
doc = f.read()
p = r"DBREF\s+." + "{4}" + f"\s(.).+{uniprot_id.upper()}"
p = r"DBREF\s+." + "{4}" + rf"\s(.).+{uniprot_id.upper()}"
return re.findall(p,doc)[0]

def create_modified_pdb(img: np.array, uniprot_id: str, output_path: str, pdb_pth=None, chain=None):
'''
Replaces the bfactor column with the patho score.
'''
if pdb_pth is not None and os.path.isfile(pdb_pth):
target_pdb = pdb_pth

else:
target_pdb = os.path.join(tempfile.gettempdir(), "AF.pdb")
api_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id.upper()}"
response = requests.get(api_url)
response = requests.get(api_url, timeout=20)
r = response.json()
urllib.request.urlretrieve(r[0]['pdbUrl'], target_pdb)

mean_per_pos = img.mean(axis=0)

with open(target_pdb) as f:
with open(output_path, 'w+') as out_file:
with open(target_pdb, encoding="utf-8") as f:
with open(output_path, 'w+', encoding="utf-8") as out_file:
for line in f:
if line.startswith("ATOM "):
if chain is not None and line[21] != chain:
Expand All @@ -534,33 +547,33 @@ def create_modified_pdb(img: np.array, uniprot_id: str, output_path: str, pdb_pt
else:
out_file.write(f'{line}')


def _main_():
args = create_parser().parse_args()
def _run(uniprot_id: str, output_path: str, pdbpath: str, maxacid: int):
download_missense_data()
os.makedirs(args.output_path, exist_ok=True)
os.makedirs(output_path, exist_ok=True)

# Wenn pdb existiert
chain = None
if args.pdbpath is not None and os.path.exists(args.pdbpath):
if pdbpath is not None and os.path.exists(pdbpath):
try:
chain = get_chain(args.uniprot_id, args.pdbpath)
except:
print(f"Cant find chain for {args.uniprot_id} in {args.pdbpath}")
chain = get_chain(uniprot_id, pdbpath)
except IndexError:
print(f"Cant find chain for {uniprot_id} in {pdbpath}")
sys.exit(1)

pos_to_val = get_data_tuple(args.uniprot_id)


pos_to_val = get_data_tuple(uniprot_id)

out_fig_pth = os.path.join(args.output_path, f"{args.uniprot_id}.pdf")
img_raw_data = make_and_save_plot(pos_to_val, out_fig_pth, args.maxacid)
out_fig_pth = os.path.join(output_path, f"{uniprot_id}.pdf")
img_raw_data = make_and_save_plot(pos_to_val, out_fig_pth, maxacid)
print(f"Save plot to {out_fig_pth}")
out_pdb_pth = os.path.join(args.output_path, f'{args.uniprot_id}-edit.pdb')
create_modified_pdb(img_raw_data, args.uniprot_id, out_pdb_pth, args.pdbpath, chain)
out_pdb_pth = os.path.join(output_path, f'{uniprot_id}-edit.pdb')
create_modified_pdb(img_raw_data, uniprot_id, out_pdb_pth, pdbpath, chain)
print(f"Save modified PDB to {out_pdb_pth}")

def _main_():
args = create_parser().parse_args()
_run(args.uniprot_id, args.output_path, args.pdbpath, args.maxacid)




if __name__ == "__main__":
_main_()
_main_()
Loading

0 comments on commit eae0343

Please sign in to comment.