diff --git a/projects/03/main.ipynb b/projects/03/main.ipynb
new file mode 100644
index 0000000..828c6c7
--- /dev/null
+++ b/projects/03/main.ipynb
@@ -0,0 +1,1416 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "42d306b0-e6aa-44ac-bc9b-ae0a55db2b05",
+ "metadata": {},
+ "source": [
+ "# Automated interaction statistics"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "41f41d4a-84bb-432a-b40f-66f9c8c5a9ec",
+ "metadata": {},
+ "source": [
+ "This notebooks aims to provide an automated pipeline to generate interaction statistics for any protein with available protein:ligand complexes in the PDB. The only required parameter is the UniProt ID of the protein of interest."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "793e81c3-6c7b-4438-9539-3b9e9f3df8f5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "from pathlib import Path\n",
+ "import urllib\n",
+ "import zlib\n",
+ "\n",
+ "import biotite.database.rcsb as rcsb\n",
+ "import requests\n",
+ "from tqdm.auto import tqdm\n",
+ "\n",
+ "from plipify.core import Structure\n",
+ "from plipify.fingerprints import InteractionFingerprint"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "69599ad4-6860-401f-a941-62b789262dcd",
+ "metadata": {},
+ "source": [
+ "Beside the UniProt ID, one can further customize the PDB query for potential protein:ligand complexes, i.e. number of mutations, experimental method, ligand type and the molecular weight of the co-crystallized ligands."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "4a31b498-a319-456c-9c3b-7bb7d1f5870a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "uniprot_id = \"P00519\" # ABL1\n",
+ "#uniprot_id = \"P00533\" # EGFR\n",
+ "#uniprot_id = \"Q9NRG4\" # SMYD2\n",
+ "max_mutation_count = 0\n",
+ "experimental_method = \"X-RAY DIFFRACTION\"\n",
+ "ligand_type = \"non-polymer\"\n",
+ "ligand_mw = (300, 500)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d5bc0894-ee0d-4f3c-a34f-3e1da054f6e6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "HERE = Path(_dh[-1])\n",
+ "DATA = HERE / \"data\" / uniprot_id\n",
+ "DATA.mkdir(exist_ok=True, parents=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ffb5b46b-d08f-4f5b-b1d2-88cec903d7df",
+ "metadata": {},
+ "source": [
+ "## Collect relevant pdb codes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d3b130c8-bed6-49f4-bf46-2a607e1adfbf",
+ "metadata": {},
+ "source": [
+ "In the next step, biotite's [rcsb module](https://www.biotite-python.org/apidoc/biotite.database.rcsb.html#module-biotite.database.rcsb) will be used to query the PDB for relevant structures. The query is thereby customized by the parameters given above."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "4d45bf5a-887a-40b7-8027-f55b67ac599e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Found 30 structures in the PDB.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query_by_uniprot = rcsb.FieldQuery(\n",
+ " \"rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_name\",\n",
+ " exact_match=\"UniProt\"\n",
+ ")\n",
+ "query_by_uniprot_id = rcsb.FieldQuery(\n",
+ " \"rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers.database_accession\",\n",
+ " exact_match=uniprot_id\n",
+ ")\n",
+ "query_mutation_count = rcsb.FieldQuery(\n",
+ " \"entity_poly.rcsb_mutation_count\",\n",
+ " range_closed=(0, max_mutation_count)\n",
+ ")\n",
+ "query_by_experimental_method = rcsb.FieldQuery(\n",
+ " \"exptl.method\",\n",
+ " exact_match=experimental_method\n",
+ ")\n",
+ "query_by_ligand_type = rcsb.FieldQuery(\n",
+ " \"chem_comp.type\",\n",
+ " exact_match=ligand_type\n",
+ ")\n",
+ "query_by_ligand_mw = rcsb.FieldQuery(\n",
+ " \"chem_comp.formula_weight\",\n",
+ " range_closed=ligand_mw\n",
+ ")\n",
+ "\n",
+ "results = rcsb.search(\n",
+ " rcsb.CompositeQuery(\n",
+ " [\n",
+ " query_by_uniprot,\n",
+ " query_by_uniprot_id,\n",
+ " query_mutation_count,\n",
+ " query_by_experimental_method,\n",
+ " query_by_ligand_type,\n",
+ " query_by_ligand_mw\n",
+ " ],\n",
+ " operator=\"and\"\n",
+ " ),\n",
+ " return_type=\"polymer_instance\"\n",
+ ")\n",
+ "pdbs = []\n",
+ "for result in results:\n",
+ " pdb_id, chain = result.split(\".\")\n",
+ " pdbs.append({\"pdb_id\": pdb_id, \"chain\": chain})\n",
+ "\n",
+ "print(f\"Found {len(pdbs)} structures in the PDB.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "608f6a44-2008-416b-a0f7-b1f2ae45836a",
+ "metadata": {},
+ "source": [
+ "## Collect ligands bound to PDB structures"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a621980-edc0-47f8-9216-989e715f4731",
+ "metadata": {},
+ "source": [
+ "Currently, it's not possible to query the PDB about co-crystallized ligands in a programmatic fashion. Hence, we will perform manual requests and directly filter for ligands with a matching molecular weight."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "cbe65d45-0d5b-466e-a453-e07ed5fa4515",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e9628694bdc8490db6121878b561320c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/30 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Identified 44 potential protein:ligand complexes.\n"
+ ]
+ }
+ ],
+ "source": [
+ "base_url = \"https://data.rcsb.org/graphql?query=\"\n",
+ "potential_complexes = []\n",
+ "for pdb in tqdm(pdbs):\n",
+ " pdb_id = pdb[\"pdb_id\"]\n",
+ " query = '{entry(entry_id:\"' + pdb_id + '\"){nonpolymer_entities{nonpolymer_comp{chem_comp{id,formula_weight}}}}}'\n",
+ " response = requests.get(base_url + urllib.parse.quote(query))\n",
+ " potential_ligands = json.loads(response.text)[\"data\"][\"entry\"][\"nonpolymer_entities\"]\n",
+ " # remove ligands without molecular weight\n",
+ " potential_ligands = [\n",
+ " ligand\n",
+ " for ligand in potential_ligands \n",
+ " if ligand[\"nonpolymer_comp\"][\"chem_comp\"][\"formula_weight\"] is not None\n",
+ " ]\n",
+ " # filter for molecular weight\n",
+ " ligands = [\n",
+ " ligand[\"nonpolymer_comp\"][\"chem_comp\"][\"id\"]\n",
+ " for ligand in potential_ligands \n",
+ " if ligand_mw[0] <= ligand[\"nonpolymer_comp\"][\"chem_comp\"][\"formula_weight\"] <= ligand_mw[1]\n",
+ " ]\n",
+ " # collect chain:ligand pairs\n",
+ " for ligand in ligands:\n",
+ " potential_complexes.append(\n",
+ " {\"pdb_id\": pdb_id, \n",
+ " \"chain\": pdb[\"chain\"],\n",
+ " \"ligand\": ligand}\n",
+ " )\n",
+ "\n",
+ "print(f\"Identified {len(potential_complexes)} potential protein:ligand complexes.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e559c3e9-f35f-47f0-b8d3-3206a7b1cb28",
+ "metadata": {},
+ "source": [
+ "## Download and prepare runumbered structures"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de3c635b-307c-43f2-aeae-88421b587ffa",
+ "metadata": {},
+ "source": [
+ "Structures of the same protein deposited in the PDB are not guaranteed to have the same residue numbering. However, this is important for plipify to compare interactions between different structures. Luckily, the Dunbrack Lab provides structures [online](http://dunbrack3.fccc.edu/PDBrenum/) that are renumbered according to the UniProt ID. The following code will download the respective structures and prepare separate file for each chain:ligand combination."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "c0b741f5-7d47-4f16-a251-09b1fc580b85",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "70f2d81232b8486eafac643b03f49bf1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/44 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Generated separate PDB files for 40 protein:ligand complexes.\n"
+ ]
+ }
+ ],
+ "source": [
+ "valid_complexes = []\n",
+ "for potential_complex in tqdm(potential_complexes):\n",
+ " pdb_id = potential_complex[\"pdb_id\"]\n",
+ " chain = potential_complex[\"chain\"]\n",
+ " ligand = potential_complex[\"ligand\"]\n",
+ " pdb_path = DATA / f\"{pdb_id}_{chain}_{ligand}.pdb\"\n",
+ " response = requests.get(f\"http://dunbrack3.fccc.edu/PDBrenum/output_PDB/{pdb_id.lower()}_renum.pdb.gz\")\n",
+ " ligand_identified = False\n",
+ " with open(pdb_path, \"w\") as pdb_file:\n",
+ " for line in zlib.decompress(response.content, 15+32).decode(\"utf-8\").split(\"\\n\"):\n",
+ " if line.startswith((\"ATOM\", \"HETATM\", \"ANISOU\", \"TER\")):\n",
+ " if line[21] == chain: # filter for chain\n",
+ " if line.startswith(\"HETATM\"):\n",
+ " if line[17:20] == ligand: # filter for ligand, might be problematic for co-factors\n",
+ " ligand_identified = True\n",
+ " pdb_file.write(line + \"\\n\")\n",
+ " else:\n",
+ " pdb_file.write(line + \"\\n\")\n",
+ " else:\n",
+ " pdb_file.write(line + \"\\n\")\n",
+ " # remove structure without accompanying ligand\n",
+ " if not ligand_identified:\n",
+ " pdb_path.unlink()\n",
+ " else:\n",
+ " potential_complex[\"file_path\"] = pdb_path\n",
+ " valid_complexes.append(potential_complex)\n",
+ "\n",
+ "print(f\"Generated separate PDB files for {len(valid_complexes)} protein:ligand complexes.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "57060974-fb05-406a-88d8-a1c56587bcfc",
+ "metadata": {},
+ "source": [
+ "## Load structures"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1365b64-3abe-463f-b75c-058cf7e66239",
+ "metadata": {},
+ "source": [
+ "Next, all structures will be loaded into plipify and checked for the existence of multiple ligands. This can happen if the same ligand is present twice in the same chain, e.g. 5V3H."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "cd24540c-78f6-454b-b089-96e17721df35",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e97e3b04087a433ca05a45cecc2ea7fe",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/40 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "structures = []\n",
+ "for valid_complex in tqdm(valid_complexes):\n",
+ " structure = Structure.from_pdbfile(str(valid_complex[\"file_path\"]))\n",
+ " if len(structure.binding_sites) != 1:\n",
+ " print(f\"{valid_complex['pdb_id']} chain {valid_complex['chain']} contains {len(structure.binding_sites)} binding sites but we want exactly one.\")\n",
+ " structures.append(structure)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33ad836e-cec6-4e82-b16d-8ac0306cbb9c",
+ "metadata": {},
+ "source": [
+ "## Identify common residues"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7e3bf3e-1a25-4146-9978-8885b6ecc8fc",
+ "metadata": {},
+ "source": [
+ "Additionally, we will restrict the interaction analysis to the residues that are common to all structures."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "a3a982db-7b8d-4f62-a3c0-a802cddc5663",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Identified 202 residues common to all structures.\n"
+ ]
+ }
+ ],
+ "source": [
+ "resid_sets = []\n",
+ "for structure in structures:\n",
+ " protein_resids = [residue.seq_index for residue in structure.residues if residue.is_protein()]\n",
+ " resid_sets.append(set(protein_resids))\n",
+ "\n",
+ "common_resids = resid_sets[0].intersection(*resid_sets)\n",
+ "print(f\"Identified {len(common_resids)} residues common to all structures.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "096565f2-cda5-4c2c-948c-d23c585c3822",
+ "metadata": {},
+ "source": [
+ "## Generate pocket mappings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "47929e60-7674-453a-81c5-010d898304e3",
+ "metadata": {},
+ "source": [
+ "These common residues can be passed as pocket mappings to plipify."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "3ad51955-f265-4dd9-93a6-9f16f8168a6c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pocket_mappings = []\n",
+ "for structure in structures:\n",
+ " chain = structure.identifier.split(\"_\")[1]\n",
+ " mapping = {}\n",
+ " for common_resid in common_resids:\n",
+ " mapping[common_resid] = {\n",
+ " \"seq_index\": common_resid,\n",
+ " \"chain\": chain,\n",
+ " }\n",
+ " pocket_mappings.append(mapping)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07521b70-4cb6-4d17-864d-a6a4d06b2792",
+ "metadata": {},
+ "source": [
+ "## Generate fingerprint"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a074f4b-6a6a-4077-abae-f93c45e4d287",
+ "metadata": {},
+ "source": [
+ "Finally, the fingerprint will be generated."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "bb9269e5-1f03-45b7-9e05-005115ca867b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
| hydrophobic | hbond-don | hbond-acc | saltbridge | pistacking | pication | halogen |
\n",
+ " \n",
+ " 248 | \n",
+ " 26 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 256 | \n",
+ " 12 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 261 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 264 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 269 | \n",
+ " 13 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 271 | \n",
+ " 13 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 293 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 299 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 302 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 313 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 315 | \n",
+ " 18 | \n",
+ " 14 | \n",
+ " 14 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 316 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 317 | \n",
+ " 11 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 318 | \n",
+ " 0 | \n",
+ " 27 | \n",
+ " 9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 320 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 321 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 322 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 332 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 337 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 340 | \n",
+ " 11 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 341 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 359 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 361 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 367 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 370 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 380 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 381 | \n",
+ " 17 | \n",
+ " 16 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 382 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 415 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 429 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 432 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 433 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 435 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 462 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 465 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 468 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fp = InteractionFingerprint().calculate_fingerprint(\n",
+ " structures,\n",
+ " residue_indices=pocket_mappings,\n",
+ " labeled=True, \n",
+ " as_dataframe=True, \n",
+ " remove_non_interacting_residues=True,\n",
+ " remove_empty_interaction_types=True,\n",
+ " ensure_same_sequence=False\n",
+ " )\n",
+ "\n",
+ "if not fp.values.shape[0]:\n",
+ " raise ValueError(\"Fingerprint is empty!\")\n",
+ "\n",
+ "fp.style.background_gradient(axis=None, cmap=\"YlGnBu\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "89faece2-75a3-4951-9e06-175e02c73c68",
+ "metadata": {},
+ "source": [
+ "## Apply KLIFS residue numbering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "7e27ce79-b702-46f7-bdcb-352140941825",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from opencadd.databases.klifs import setup_remote"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "4f26bc62-5348-444d-80ec-ba3fdce06b12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up remote session\n",
+ "klifs = setup_remote()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "107e1799-f206-4f56-9e31-34a5dee3c978",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "klifs_kinase_id = 1061 # ABL1 4WA9\n",
+ "pocket = klifs.pockets.by_structure_klifs_id(klifs_kinase_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "147b1374-e06b-443a-b32c-7484154b8eb3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for index in fp.index:\n",
+ " try:\n",
+ " klifs_resid = int(pocket[pocket[\"residue.id\"] == str(index)][\"residue.klifs_id\"].iloc[0])\n",
+ " except IndexError:\n",
+ " continue\n",
+ " fp.rename(index={index: f\"KLI{klifs_resid:02d}\"}, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "27b56195-2db4-43fc-a3da-7d5b4653850c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " | hydrophobic | hbond-don | hbond-acc | saltbridge | pistacking | pication | halogen |
\n",
+ " \n",
+ " KLI03 | \n",
+ " 26 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI11 | \n",
+ " 12 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 261 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 264 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI15 | \n",
+ " 13 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " KLI17 | \n",
+ " 13 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI31 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI36 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI39 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI43 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI45 | \n",
+ " 18 | \n",
+ " 14 | \n",
+ " 14 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI46 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI47 | \n",
+ " 11 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI48 | \n",
+ " 0 | \n",
+ " 27 | \n",
+ " 9 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI50 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI51 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI52 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 332 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 337 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 340 | \n",
+ " 11 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 341 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI66 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI68 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI74 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI77 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI80 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI81 | \n",
+ " 17 | \n",
+ " 16 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " KLI82 | \n",
+ " 21 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 415 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 429 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 432 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 433 | \n",
+ " 6 | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 435 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 462 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 465 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 468 | \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fp.style.background_gradient(axis=None, cmap=\"YlGnBu\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}