diff --git a/README.manual_run.md b/README.manual_run.md new file mode 100644 index 0000000..231d11f --- /dev/null +++ b/README.manual_run.md @@ -0,0 +1,377 @@ +# Running CATH-AlphaFlow-CLI - Step-by-step Tutorial +## UCL CS HPC (Test on 100,000 structures) + +Initial Steps: + +- load python module - `module load python/3.8.5` +- create virtual environment `python3 -m venv venv && . venv/bin/activate` +- Upgrade pip, wheel and pytest `pip install --upgrade pip wheel pytest` +- Install repo - `git clone git@github.com:UCLOrengoGroup/cath-alphaflow.git` +- Create .env file (use prod settings) - see `config.env` +- export instantclient for Oracle `export LD_LIBRARY_PATH=~/instantclient_21_7` + + +## Retrieve list of 100k UniProt IDs from database + +Time: 7 minutes + +Command: + +``` +cath-af-cli create-dataset-uniprot-ids --uniprot_ids_csv af_uniprot_id_100k_from_g3d.list \ + --dbname gene3d_21 \ + --max_evalue 1e-50 \ + --max_records 100000 \ + ``` + +Output: `af_uniprot_id_100k_from_g3d.list` + +## Run create-cath-dataset-from-db + +Time: 4 minutes + +Input: af_uniprot_id_100k_from_g3d.list + +Command: + +``` +cath-af-cli create-cath-dataset-from-db \ + --csv_uniprot_ids af_uniprot_id_100k_from_g3d.list \ + --dbname gene3d_21 \ + --af_cath_annotations af_100k_cath_annotations.tsv \ + --af_chainlist_ids af_100k_chainlist_ids.tsv \ + --af_domainlist_ids af_100k_domainlist_ids.tsv \ + --gene3d_crh_output af_100k.crh \ + --csv_uniprot_md5 af_100k_md5.tsv \ +``` + +Output: + + - af_100k_cath_annotations.tsv + - af_100k_chainlist_ids.tsv + - af_100k_chainlist_ids.tsv + - af_100k.crh + - af_100k_md5.tsv + +## Create GCloud Manifest File + +Time: less than 1 min + +Input: `af_100k_chainlist_ids.tsv` + +Output: `af_100k_manifest.txt` + +``` +grep -v 'chain_id' af_100k_chainlist_ids.tsv | sort | uniq | tr -d '\r' | awk '{print "gs://public-datasets-deepmind-alphafold-v4/"$1".cif"}' > af_100k_manifest.txt +``` + +## Download structures from GCloud + +time: 2hrs and 20mins + +Input: af_100k_manifest.txt + +Output: `af_raw_cif` folder containing unchopped AlphaFold chains + +``` +mkdir af_cif_raw +cat af_100k_manifest.txt | (/share/apps/google-cloud-sdk/bin/gsutil -o GSUtil:parallel_process_count=1 -m cp -I af_cif_raw/ || echo "Ignoring non-zero exit code: \$?") +``` + +The step outputs the number of UniProt IDs not available in AlphaFoldDB. + +``` +CommandException: 6532 files/objects could not be transferred. Total downloaded CIF: 62,212 +``` + +## Create new AFChainList after GCloud download. + +Rationale: + +The `cif_to_fasta` module in `cath-af-cli` expects a list of existing files, so we need to feed it just the chains that are in AFDB and are available on the filesystem. + +``` +cd af_cif_raw +find *.cif > ../af_100k_chainlist_after_gsutil.txt +sed -i 's/\.cif//g' af_100k_chainlist_after_gsutil.txt +echo af_chain_id | cat - af_100k_chainlist_after_gsutil.txt > temp && mv temp af_100k_chainlist_after_gsutil.txt +``` + + +## Convert CIF to FASTA + +Process: 62,212 CIF files -> 62,212 FASTA files + merged.fasta + +Thoughts: Too slow on 1CPU (i.e. 30mins for 10k sequences, splitting and parallelizing) + +Time: 1hr 9 mins + +Input: + +``` +af_cif_raw/ +af_100k_chainlist_after_gsutil.txt +``` + +Output: +`af_fasta_raw` - folder containing a FASTA file for each chain available in the filesystem +`merged.fasta` - file containing a multiFASTA of all processed CIF files. + +``` +mkdir af_fasta_raw +split -l 10000 af_100k_chainlist_after_gsutil.txt splitlist +find splitlista* | xargs -P4 -I XXX sh -c 'cath-af-cli convert-cif-to-fasta --cif_in_dir af_cif_raw/ \ + --id_file XXX \ + --fasta_out_dir af_fasta_raw/' +``` + +Careful: the module expects each chunk to have a header, so the first structure doesn't get processed if we use chunks instead of the first file. + +## Create MD5 from FASTA + +Time: seconds + +Input: `merged.fasta` + +Output: `af_100k_cif_raw_md5.txt` + +``` +cath-af-cli create-md5 --fasta merged.fasta \ + --uniprot_md5_csv af_100k_cif_raw_md5.txt +``` + +## Filter CRH + +Rationale: + +- Extract and further process only matches between CIF-md5 and chopping MD5 +- Remove all CRH entries that rely on a missing AFDB entry. + +Time: 1 second + +Issue: The files generated in step 6 have some carriage returns (like ^M) that needs removing using tr -d '\r' + +Temporary fix: Strip these characters before doing the grep command. + +Command: + +``` +grep -v 'af_chain_id' af_100k_cif_raw_md5.txt | awk '{print $3}' | tr -d '\r' | grep -F -f - af_100k.crh > af_100k_crh_after_filtering.crh +sort af_100k_crh_after_filtering.crh | uniq > af_100k_crh_after_filtering_uniq.crh +awk '{print $1}' af_100k_crh_after_filtering_uniq.crh | grep -F -f - af_100k_cif_raw_md5.txt | awk '{print $1}' | grep -F -f - af_100k_domainlist_ids.txt > af_100k_domainlist_after_md5_filter.txt +echo af_domain_id | cat - af_100k_domainlist_after_md5_filter.txt > temp && mv temp af_100k_domainlist_after_md5_filter.txt +``` + +Output: `af_100k_crh_after_filtering.crh` + +## Chop CIF before optimisation + +### WARNING!! + +### Unnecessary step as `optimise-domain-boundaries` acts on chains, not on domains, so we can optimise first and chop once. + +Running on xargs with 12 processes with a uniq-ed crh file and domain list (120,129 domains) +Time: 1hr 45 minutes + +Command: + +``` +split -l 7500 af_100k_domainlist_after_md5_filter.txt split_crh_uniq_ -a 5 -d +find split_crh_uniq_000* | xargs -P12 -I XXX sh -c 'cath-af-cli chop-cif --cif_in_dir af_cif_raw/ \ + --id_file XXX \ + --cif_out_dir chopped_cif_before_optimization/' +``` + +Issues Found: +Remove af_domain_id from header, program can't parse it or add exception, as it skips the first entry but it crashes as it can't find af_domain_id + +Stats: Chopped 120,129 domains + +## Optimise boundaries +On 16 threads, 7500 entries each + +Time: 1hr 1 min + +Comments/TODO: + - Include percentage of delta length (i.e. how much the boundaries are changing?) + - Remove af_domain_id from header, it crashes the process. + - Optimise boundaries works on chains, not on domains. + +The initial chopping is redundant, keeping it in this instance as it's interesting for debugging. + +Commands: + +``` +split -l 7500 af_100k_domainlist_after_md5_filter_uniq.txt split_af_100k -a 5 -d +find split_af_100k000* | xargs -P16 -I XXX sh -c 'cath-af-cli optimise-domain-boundaries --af_domain_list XXX \ + --af_chain_mmcif_dir af_cif_raw/ \ + --af_domain_list_post_tailchop af_100k_domainlist_post_tailchop_XXX.txt \ + --af_domain_mapping_post_tailchop af_100k_domain_mapping_post_tailchop_XXX.tsv \ + --status_log optimise_boundaries_status_log_XXX' +``` + +Afterwards: + +- Concatenate logs +- Concatenate af_domain_list_post_tailchop +- Concatenate af_domain_mapping_post_tailchop +- Remove duplicated headers + +``` +cat af_100k_domain_mapping_post_tailchop_split_af_100k000* | grep -v 'af_domain_id' > af_100k_domain_mapping_post_tailchop.tsv +cat af_100k_domainlist_post_tailchop_split_af_100k000* grep -v 'af_domain_id' > af_100k_domainlist_post_tailchop.txt +cat optimise_boundaries_status_log_split_af_100k000* > optimise_boundaries_status_log +rm af_100k_domain_mapping_post_tailchop_split_af_100k000* +rm optimise_boundaries_status_log_split_af_100k000* +rm af_100k_domainlist_post_tailchop_split_af_100k000* +rm split_af_100k000* +``` + +Stats: + +- 84,604 have unchanged boundaries +- 35,378 have adjusted boundaries +- 129 have failed due to not having residues above pLDDT 70 + + + +## Chop CIF files after boundaries optimisation + +Time: 1hr 46 mins on 12 threads + +``` +split -l 7500 af_100k_domainlist_post_tailchop.txt af_100k_domainlist -a 5 -d +find af_100k_domainlist00* | xargs -P12 -I XXX sh -c 'cath-af-cli chop-cif --cif_in_dir af_cif_raw/ \ + --id_file XXX \ + --cif_out_dir chopped_cif_after_optimisation/' +``` + +TODO: Add StatusLog to ALL MODULES + +Chopped 120,112 domains with optimised boundaries + +## Calculate pLDDT and LUR report + +Time: 59 mins on 16 threads + +``` +split -l 7500 af_100k_domainlist_post_tailchop.txt split_af_100k_plddt_ -a 5 -d +find split_af_100k_plddt_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-cif-to-plddt-summary --cif_in_dir af_cif_raw/ \ + --id_file XXX \ + --plddt_stats_file af_100k_plddt_summary_XXX.tsv' +cat af_100k_plddt_summary_split_af_100k_plddt_*.tsv > af_100k_plddt_summary.tsv +``` + +## Create DSSP Files + +Time: 4 hrs + +Requirements: boost library. On CS HPC can be loaded as a module. + +``` +module load boost/1.71.0 +mkdir af_dssp_dir +find split_af_100k_dssp_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-cif-to-dssp --cif_in_dir af_cif_raw/ \ + --id_file XXX \ + --dssp_out_dir af_dssp_dir/' +``` + +Comments: Implement a skip if the file is already present, maybe switch to IUPRED? Or some other secondary structure predictors based on sequence. + +## Convert DSSP to SSE summary + +Time: 4 mins + +Command: +``` +find split_af_100k_dssp_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-dssp-to-sse-summary --dssp_dir af_dssp_dir/ \ + --id_file XXX \ + --sse_out_file af_100k_sse_XXX.tsv' +``` + +## Convert optimised domains to Foldseek DB + +Time: 2 hrs 7 mins on a single thread, RAM:1G + +Command: +``` +cath-af-cli convert-cif-to-foldseek-db --cif_dir chopped_cif_after_optimisation/ \ + --fs_querydb_dir af_foldseek_db/ \ + --fs_bin_path /SAN/biosciences/alphafold/foldseek/bin/foldseek +``` + +Comment: +- Added an option to generate a database only from a few structures. Needs to be specified as a CLI option using id_list +- Investigate why m_core on CS is not seen by Foldseek. Increasing RAM available speeds up the process significantly. + + + +## Run Foldseek of Query_Domain_Structures against CATH_S95 Representatives + +Time: 2hrs 30 + +Command: +``` +cath-af-cli run-foldseek --fs_querydb af_foldseek_db/af_query_foldseek.db \ + --fs_targetdb /SAN/cath/cath_v4_3_0/databases/foldseek/cath_s95/cath_s95_db \ + --fs_bin_path /SAN/biosciences/alphafold/foldseek/bin/foldseek +``` + +## Convert Foldseek Results to Summary + +Time: 5 mins + +Command: +``` +cath-af-cli convert-foldseek-output-to-summary --id_file af_100k_domainlist_post_tailchop.txt \ + --fs_input_file fs_query_results.m8 \ + --fs_results fs_query_results.txt +``` + +Starting from 110,250 unique domain ids in foldseek output, resulting into 110,218 hits passing the thresholds. + +## Globularity prediction + +Time: 2hrs 6 mins + +Split into 16 processes. Processing 120,113 domains + +Command: +``` +split -l 7500 af_100k_domainlist_post_tailchop.txt split_af_100k_globularity_ -a 5 -d +find split_af_100k_globularity_000* | xargs -P16 -I XXX sh -c 'cath-af-cli measure-globularity --af_domain_list XXX \ + --af_chain_mmcif_dir af_cif_raw/ \ + --af_domain_globularity af_100k_globularity_XXX.txt' +cat af_100k_globularity_split_af_100k_globularity_000* > af_100k_globularity.txt +rm split_af_100k_globularity_000* +af_100k_globularity_split_af_100k_globularity_000* +``` + +As this was done in chunks, remove the 16 additional occurences of the header from af_100k_globularity.txt + + +## Combine results + +TBD + + + + + + + + + + + + + + + + + + + + + + diff --git a/cath_alphaflow/cli.py b/cath_alphaflow/cli.py index 1408751..79a7474 100644 --- a/cath_alphaflow/cli.py +++ b/cath_alphaflow/cli.py @@ -16,6 +16,7 @@ from .commands import create_md5 from .commands import convert_cif_to_fasta from .commands import load_mongo +from .commands import measure_globularity logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s" @@ -65,3 +66,4 @@ def dump_config(): cli.add_command(create_md5.create_md5) cli.add_command(convert_cif_to_fasta.convert_cif_to_fasta) cli.add_command(load_mongo.load_af_from_archive) +cli.add_command(measure_globularity.measure_globularity) diff --git a/cath_alphaflow/commands/measure_globularity.py b/cath_alphaflow/commands/measure_globularity.py new file mode 100644 index 0000000..82c5864 --- /dev/null +++ b/cath_alphaflow/commands/measure_globularity.py @@ -0,0 +1,249 @@ +from Bio.PDB import MMCIFParser, Selection, NeighborSearch +import logging +from pathlib import Path +import gzip +import numpy as np +import click + + +from cath_alphaflow.io_utils import get_af_domain_id_reader +from cath_alphaflow.io_utils import get_csv_dictwriter +from cath_alphaflow.models.domains import AFDomainID +from cath_alphaflow.constants import DEFAULT_GLOB_DISTANCE, DEFAULT_GLOB_VOLUME + +LOG = logging.getLogger() + +# Input: +# - `FILE:AF_CHAIN_MMCIF` +# - `FILE:AF_DOMAIN_LIST` + +# Output: +# - `FILE:AF_DOMAIN_LIST_POST_TAILCHOP` -- `(AF_domain_ID_tailchop)` +# - `FILE:AF_DOMAIN_MAPPING_POST_TAILCHOP` -- `(AF_domain_ID_orig, AF_domain_ID_tailchop)` + + +@click.command() +@click.option( + "--af_domain_list", + type=click.File("rt"), + required=True, + help="Input: CSV file containing AF2 domains", +) +@click.option( + "--af_chain_mmcif_dir", + type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True), + required=True, + help="Input: directory of mmCIF files", +) +@click.option( + "--gzipped_af_chains", + type=bool, + default=False, + help="Set True if AF-chain files are stored in .gz files" +) +@click.option( + "--af_domain_globularity", + type=click.File("wt"), + required=True, + help="Output: CSV file for AF2 domain list including both globularity parameters", +) +@click.option( + "--distance_cutoff", + type=int, + required=False, + default=DEFAULT_GLOB_DISTANCE, + help=f"Distance cutoff for the packing density in Angstrom. (default: {DEFAULT_GLOB_DISTANCE})", +) +@click.option( + "--volume_resolution", + type=int, + required=False, + default=DEFAULT_GLOB_VOLUME, + help=f"The voxel resolution for approximating the protein volume. (default: {DEFAULT_GLOB_VOLUME})", +) +def measure_globularity( + af_domain_list, + af_chain_mmcif_dir, + af_domain_globularity, + distance_cutoff, + volume_resolution, + gzipped_af_chains, +): + "Checks the globularity of the AF domain" + + af_domain_list_reader = get_af_domain_id_reader(af_domain_list) + + af_globularity_writer = get_csv_dictwriter( + af_domain_globularity, + fieldnames=["af_domain_id", "packing_density", "normed_radius_gyration"], + ) + af_globularity_writer.writeheader() + + click.echo( + f"Checking globularity for AF domain" + f"(mmcif_dir={af_chain_mmcif_dir}, in_file={af_domain_list.name}, " + f"out_file={af_domain_globularity.name} ) ..." + ) + + for af_domain_id in af_domain_list_reader: + LOG.debug(f"Working on: {af_domain_id} ...") + + af_domain_packing_density = calculate_packing_density( + af_domain_id, af_chain_mmcif_dir, distance_cutoff,gzipped_af_chains + ) + + af_domain_normed_radius_gyration = calculate_normed_radius_of_gyration( + af_domain_id, af_chain_mmcif_dir,volume_resolution,gzipped_af_chains + ) + LOG.debug(f"Processed entry: {af_domain_id}\tpacking_density:{af_domain_packing_density}\tgiration:{af_domain_normed_radius_gyration}") + af_globularity_writer.writerow( + { + "af_domain_id": af_domain_id, + "packing_density": af_domain_packing_density, + "normed_radius_gyration": af_domain_normed_radius_gyration, + } + ) + + click.echo("DONE") + + +def calculate_packing_density( + af_domain_id: AFDomainID, + af_chain_mmcif_dir: Path, + distance_cutoff: int, + gzipped_af_chains, + *, + cif_filename=None, +) -> AFDomainID: + + chopping = af_domain_id.chopping + segments = chopping.segments + domain_residues = [] + for segment in segments: + for res in range(segment.start,segment.end): + domain_residues.append(res) + + + # create default filename + if cif_filename is None: + if gzipped_af_chains == False: + open_func = open + cif_filename = af_domain_id.af_chain_id + ".cif" + else: + open_func = gzip.open + cif_filename = af_domain_id.af_chain_id + ".cif.gz" + + cif_path = Path(af_chain_mmcif_dir, cif_filename) + + with open_func(f"{cif_path}", mode="rt") as cif_fh: + + structure = MMCIFParser(QUIET=1).get_structure("struc", cif_fh) + + # List of hydrophobic residues + hydrophobic_list=['GLY','ALA','LEU','ILE','VAL','PRO','PHE','MET','TRP'] + + neighbor_list_protein = [] + # Get the number of nearby residues for all hydrophobic residues in the domain + for residue_number in domain_residues: + residue = structure[0]["A"][residue_number] + if residue.get_resname() in hydrophobic_list and not residue.is_disordered(): + center_atoms = Selection.unfold_entities(residue, 'A') + atom_list = [atom for atom in structure.get_atoms()] + ns = NeighborSearch(atom_list) + + nearby_residues = {residue for center_atom in center_atoms + for residue in ns.search(center_atom.coord, distance_cutoff, 'R')} + neighbor_list_protein.append(len(nearby_residues)) + + return round(np.mean(neighbor_list_protein),3) + + +# Function to get the approximated Volume of the domain +def get_volume(structure,domain_residues,volume_resolution): + + reduced_coords = list() + for residue_number in domain_residues: + residue = structure[0]["A"][residue_number] + for atm in residue.get_atoms(): + x_reduced = int(atm.coord.tolist()[0]/volume_resolution) + y_reduced = int(atm.coord.tolist()[1]/volume_resolution) + z_reduced = int(atm.coord.tolist()[2]/volume_resolution) + reduced_coords.append([x_reduced,y_reduced,z_reduced]) + + unique_coords = [list(x) for x in set(tuple(x) for x in reduced_coords)] + volume = len(unique_coords) * volume_resolution**3 + + return volume + + + +# Function to get the normed radius of gyration +def calculate_normed_radius_of_gyration( + af_domain_id: AFDomainID, + af_chain_mmcif_dir: Path, + volume_resolution: int, + gzipped_af_chains, + *, + cif_filename=None, +) -> AFDomainID: + + chopping = af_domain_id.chopping + segments = chopping.segments + domain_residues = [] + for segment in segments: + for res in range(segment.start,segment.end): + domain_residues.append(res) + + + if cif_filename is None: + if gzipped_af_chains == False: + open_func = open + cif_filename = af_domain_id.af_chain_id + ".cif" + else: + open_func = gzip.open + cif_filename = af_domain_id.af_chain_id + ".cif.gz" + + cif_path = Path(af_chain_mmcif_dir, cif_filename) + + with open_func(f"{cif_path}", mode="rt") as cif_fh: + + structure = MMCIFParser(QUIET=1).get_structure("struc", cif_fh) + + coords = [] + masses = [] + # Fill lists for the coords and masses for all atoms in the domain + for residue_number in domain_residues: + residue = structure[0]["A"][residue_number] + + for atom in residue: + coords.append(atom.coord.tolist()) + if atom.element == 'C': + masses.append(12.0107) + elif atom.element == 'H': # We do not consider hydrogens for the radius of gyration + masses.append(0.0) + elif atom.element == 'O': + masses.append(15.9994) + elif atom.element == 'N': + masses.append(14.0067) + elif atom.element == 'S': + masses.append(32.065) + else: + raise Exception(f'Protein contains unknown atom type {atom.element}') + + # Calculate the radius of gyration + mass_coords = [(m*i, m*j, m*k) for (i, j, k), m in zip(coords, masses)] + total_mass = sum(masses) + r_tmp = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip (coords, mass_coords)) + m_tmp = sum((sum(i) / total_mass)**2 for i in zip (*mass_coords)) + radius_gyration = np.sqrt(r_tmp / total_mass-m_tmp) + + + # Calculate the radius of gyration for a sphere with the same volume as the protein + volume = get_volume(structure,domain_residues,volume_resolution) + radius=np.cbrt(volume)*3/4*np.pi + radius_gyration_sphere = np.sqrt(3/5*radius**2) + + # Return normed radius of gyration + return(round(radius_gyration/radius_gyration_sphere,3)) + + diff --git a/cath_alphaflow/commands/optimise_domain_boundaries.py b/cath_alphaflow/commands/optimise_domain_boundaries.py index a88bea4..6976a43 100644 --- a/cath_alphaflow/commands/optimise_domain_boundaries.py +++ b/cath_alphaflow/commands/optimise_domain_boundaries.py @@ -12,6 +12,7 @@ from cath_alphaflow.errors import NoMatchingResiduesError from cath_alphaflow.io_utils import get_status_log_dictwriter from cath_alphaflow.constants import STATUS_LOG_SUCCESS,STATUS_LOG_FAIL +from cath_alphaflow.seq_utils import get_local_plddt_for_res LOG = logging.getLogger() @@ -133,21 +134,6 @@ def optimise_domain_boundaries( click.echo("DONE") - -def get_local_plddt_for_res( - structure, - residue_num: int, - *, - model_num: int = 0, - chain_id: str = "A", - atom_type: str = "CA", -): - """ - Returns the local pLDDT score for a given residue - """ - return structure[model_num][chain_id][residue_num][atom_type].get_bfactor() - - def cut_segment( structure, segment_to_cut: Segment, cutoff_plddt_score, cut_start, cut_end ) -> Segment: diff --git a/cath_alphaflow/constants.py b/cath_alphaflow/constants.py index 9f21ec9..564774e 100644 --- a/cath_alphaflow/constants.py +++ b/cath_alphaflow/constants.py @@ -16,3 +16,5 @@ VALID_CIF_SUFFIXES = [".cif", ".cif.gz"] STATUS_LOG_SUCCESS = "SUCCESS" STATUS_LOG_FAIL = "FAIL" +DEFAULT_GLOB_DISTANCE = 5 +DEFAULT_GLOB_VOLUME = 5 diff --git a/cath_alphaflow/seq_utils.py b/cath_alphaflow/seq_utils.py index f8dd479..25aa2ea 100644 --- a/cath_alphaflow/seq_utils.py +++ b/cath_alphaflow/seq_utils.py @@ -59,3 +59,16 @@ def combine_fasta_files(fasta_in_dir, fasta_out_file): if fasta_file.suffix in [".fasta", ".fa"]: for seq_record in SeqIO.parse(fasta_file, "fasta"): SeqIO.write(seq_record, fasta_out_fh, "fasta") + +def get_local_plddt_for_res( + structure, + residue_num: int, + *, + model_num: int = 0, + chain_id: str = "A", + atom_type: str = "CA", +): + """ + Returns the local pLDDT score for a given residue + """ + return structure[model_num][chain_id][residue_num][atom_type].get_bfactor()