diff --git a/README.manual_run.md b/README.manual_run.md
new file mode 100644
index 0000000..231d11f
--- /dev/null
+++ b/README.manual_run.md
@@ -0,0 +1,377 @@
+# Running CATH-AlphaFlow-CLI - Step-by-step Tutorial
+## UCL CS HPC (Test on 100,000 structures)
+
+Initial Steps:
+
+- load python module - `module load python/3.8.5`
+- create virtual environment `python3 -m venv venv && . venv/bin/activate`
+- Upgrade pip, wheel and pytest `pip install --upgrade pip wheel pytest`
+- Install repo - `git clone git@github.com:UCLOrengoGroup/cath-alphaflow.git`
+- Create .env file (use prod settings) - see `config.env`
+- export instantclient for Oracle  `export LD_LIBRARY_PATH=~/instantclient_21_7`
+
+
+## Retrieve list of 100k UniProt IDs from database 
+
+Time: 7 minutes
+
+Command: 
+
+```
+cath-af-cli create-dataset-uniprot-ids --uniprot_ids_csv af_uniprot_id_100k_from_g3d.list \
+    --dbname gene3d_21 \
+    --max_evalue 1e-50 \
+    --max_records 100000 \
+ ```
+
+Output: `af_uniprot_id_100k_from_g3d.list`
+
+## Run create-cath-dataset-from-db
+
+Time: 4 minutes
+
+Input: af_uniprot_id_100k_from_g3d.list
+
+Command: 
+
+```
+cath-af-cli create-cath-dataset-from-db \
+    --csv_uniprot_ids af_uniprot_id_100k_from_g3d.list \
+    --dbname gene3d_21 \
+    --af_cath_annotations af_100k_cath_annotations.tsv \
+    --af_chainlist_ids af_100k_chainlist_ids.tsv \
+    --af_domainlist_ids af_100k_domainlist_ids.tsv \
+    --gene3d_crh_output af_100k.crh \
+    --csv_uniprot_md5 af_100k_md5.tsv \
+```
+
+Output: 
+
+    - af_100k_cath_annotations.tsv 
+    - af_100k_chainlist_ids.tsv 
+    - af_100k_chainlist_ids.tsv 
+    - af_100k.crh 
+    - af_100k_md5.tsv
+
+## Create GCloud Manifest File
+
+Time: less than 1 min
+
+Input: `af_100k_chainlist_ids.tsv`
+
+Output: `af_100k_manifest.txt`
+
+```
+grep -v 'chain_id' af_100k_chainlist_ids.tsv | sort | uniq | tr -d '\r' | awk '{print "gs://public-datasets-deepmind-alphafold-v4/"$1".cif"}' > af_100k_manifest.txt
+```
+
+## Download structures from GCloud
+
+time: 2hrs and 20mins
+
+Input: af_100k_manifest.txt
+
+Output: `af_raw_cif` folder containing unchopped AlphaFold chains
+
+```
+mkdir af_cif_raw
+cat af_100k_manifest.txt | (/share/apps/google-cloud-sdk/bin/gsutil -o GSUtil:parallel_process_count=1 -m cp -I af_cif_raw/ || echo "Ignoring non-zero exit code: \$?")
+```
+
+The step outputs the number of UniProt IDs not available in AlphaFoldDB.
+
+```
+CommandException: 6532 files/objects could not be transferred. Total downloaded CIF: 62,212
+```
+
+## Create new AFChainList after GCloud download. 
+
+Rationale: 
+
+The `cif_to_fasta` module in `cath-af-cli` expects a list of existing files, so we need to feed it just the chains that are in AFDB and are available on the filesystem.
+
+```
+cd af_cif_raw
+find *.cif > ../af_100k_chainlist_after_gsutil.txt
+sed -i 's/\.cif//g' af_100k_chainlist_after_gsutil.txt
+echo af_chain_id | cat - af_100k_chainlist_after_gsutil.txt > temp && mv temp af_100k_chainlist_after_gsutil.txt
+```
+
+
+## Convert CIF to FASTA 
+
+Process: 62,212 CIF files -> 62,212 FASTA files + merged.fasta
+
+Thoughts: Too slow on 1CPU (i.e. 30mins for 10k sequences, splitting and parallelizing)
+
+Time:  1hr 9 mins
+
+Input: 
+
+```
+af_cif_raw/ 
+af_100k_chainlist_after_gsutil.txt
+```
+
+Output: 
+`af_fasta_raw`  - folder containing a FASTA file for each chain available in the filesystem
+`merged.fasta`  - file containing a multiFASTA of all processed CIF files.
+
+```
+mkdir af_fasta_raw
+split -l 10000 af_100k_chainlist_after_gsutil.txt splitlist
+find splitlista* | xargs -P4 -I XXX sh -c 'cath-af-cli convert-cif-to-fasta --cif_in_dir af_cif_raw/ \
+    --id_file XXX \
+    --fasta_out_dir af_fasta_raw/'
+```
+
+Careful: the module expects each chunk to have a header, so the first structure doesn't get processed if we use chunks instead of the first file.
+
+## Create MD5 from FASTA
+
+Time: seconds
+
+Input: `merged.fasta`
+
+Output: `af_100k_cif_raw_md5.txt`
+
+```
+cath-af-cli create-md5 --fasta merged.fasta \
+    --uniprot_md5_csv af_100k_cif_raw_md5.txt
+```
+
+## Filter CRH 
+
+Rationale:
+
+- Extract and further process only matches between CIF-md5 and chopping MD5
+- Remove all CRH entries that rely on a missing AFDB entry.
+
+Time: 1 second
+
+Issue: The files generated in step 6 have some carriage returns (like ^M) that needs removing using tr -d '\r'
+
+Temporary fix: Strip these characters before doing the grep command.
+
+Command: 
+
+```
+grep -v 'af_chain_id' af_100k_cif_raw_md5.txt | awk '{print $3}' | tr -d '\r' | grep -F -f - af_100k.crh > af_100k_crh_after_filtering.crh
+sort af_100k_crh_after_filtering.crh | uniq > af_100k_crh_after_filtering_uniq.crh
+awk '{print $1}' af_100k_crh_after_filtering_uniq.crh | grep -F -f - af_100k_cif_raw_md5.txt | awk '{print $1}' | grep -F -f - af_100k_domainlist_ids.txt > af_100k_domainlist_after_md5_filter.txt
+echo af_domain_id | cat - af_100k_domainlist_after_md5_filter.txt > temp && mv temp af_100k_domainlist_after_md5_filter.txt
+```
+
+Output: `af_100k_crh_after_filtering.crh`
+
+## Chop CIF before optimisation
+
+### WARNING!! 
+
+### Unnecessary step as `optimise-domain-boundaries` acts on chains, not on domains, so we can optimise first and chop once.
+
+Running on xargs with 12 processes with a uniq-ed crh file and domain list (120,129 domains)
+Time:  1hr 45 minutes
+
+Command: 
+
+```
+split -l 7500 af_100k_domainlist_after_md5_filter.txt split_crh_uniq_ -a 5 -d 
+find split_crh_uniq_000* | xargs -P12 -I XXX sh -c 'cath-af-cli chop-cif --cif_in_dir af_cif_raw/ \
+    --id_file XXX \
+    --cif_out_dir chopped_cif_before_optimization/'
+```
+
+Issues Found: 
+Remove af_domain_id from header, program can't parse it or add exception, as it skips the first entry but it crashes as it can't find af_domain_id
+
+Stats: Chopped 120,129 domains
+
+## Optimise boundaries
+On 16 threads, 7500 entries each
+
+Time: 1hr 1 min
+
+Comments/TODO: 
+ - Include percentage of delta length (i.e. how much the boundaries are changing?)
+ - Remove af_domain_id from header, it crashes the process.
+ - Optimise boundaries works on chains, not on domains. 
+
+The initial chopping is redundant, keeping it in this instance as it's interesting for debugging.
+
+Commands:
+
+```
+split -l 7500 af_100k_domainlist_after_md5_filter_uniq.txt split_af_100k -a 5 -d
+find split_af_100k000* | xargs -P16 -I XXX sh -c 'cath-af-cli optimise-domain-boundaries --af_domain_list XXX \
+    --af_chain_mmcif_dir af_cif_raw/ \
+    --af_domain_list_post_tailchop af_100k_domainlist_post_tailchop_XXX.txt \
+    --af_domain_mapping_post_tailchop af_100k_domain_mapping_post_tailchop_XXX.tsv \
+    --status_log optimise_boundaries_status_log_XXX'
+```
+
+Afterwards:
+
+- Concatenate logs
+- Concatenate af_domain_list_post_tailchop 
+- Concatenate af_domain_mapping_post_tailchop 
+- Remove duplicated headers
+
+```
+cat af_100k_domain_mapping_post_tailchop_split_af_100k000* | grep -v 'af_domain_id' > af_100k_domain_mapping_post_tailchop.tsv
+cat af_100k_domainlist_post_tailchop_split_af_100k000* grep -v 'af_domain_id' > af_100k_domainlist_post_tailchop.txt
+cat optimise_boundaries_status_log_split_af_100k000* > optimise_boundaries_status_log
+rm af_100k_domain_mapping_post_tailchop_split_af_100k000*
+rm optimise_boundaries_status_log_split_af_100k000*
+rm af_100k_domainlist_post_tailchop_split_af_100k000*
+rm split_af_100k000*
+```
+
+Stats:
+
+- 84,604 have unchanged boundaries
+- 35,378 have adjusted boundaries
+- 129 have failed due to not having residues above pLDDT 70
+
+
+
+## Chop CIF files after boundaries optimisation
+
+Time: 1hr 46 mins on 12 threads
+
+```
+split -l 7500 af_100k_domainlist_post_tailchop.txt af_100k_domainlist -a 5 -d
+find af_100k_domainlist00* | xargs -P12 -I XXX sh -c 'cath-af-cli chop-cif --cif_in_dir af_cif_raw/ \
+    --id_file XXX \
+    --cif_out_dir chopped_cif_after_optimisation/'
+```
+
+TODO: Add StatusLog to ALL MODULES
+
+Chopped 120,112 domains with optimised boundaries
+
+## Calculate pLDDT and LUR report
+
+Time:  59 mins on 16 threads
+
+```
+split -l 7500 af_100k_domainlist_post_tailchop.txt split_af_100k_plddt_ -a 5 -d
+find split_af_100k_plddt_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-cif-to-plddt-summary --cif_in_dir af_cif_raw/ \
+    --id_file XXX \
+    --plddt_stats_file af_100k_plddt_summary_XXX.tsv'
+cat af_100k_plddt_summary_split_af_100k_plddt_*.tsv > af_100k_plddt_summary.tsv
+```
+
+## Create DSSP Files
+
+Time: 4 hrs
+
+Requirements: boost library. On CS HPC can be loaded as a module.
+
+```
+module load boost/1.71.0
+mkdir af_dssp_dir
+find split_af_100k_dssp_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-cif-to-dssp --cif_in_dir af_cif_raw/ \
+    --id_file XXX \
+    --dssp_out_dir af_dssp_dir/'
+```
+
+Comments: Implement a skip if the file is already present, maybe switch to IUPRED? Or some other secondary structure predictors based on sequence. 
+
+## Convert DSSP to SSE summary
+
+Time: 4 mins
+
+Command: 
+```
+find split_af_100k_dssp_000* | xargs -P16 -I XXX sh -c 'cath-af-cli convert-dssp-to-sse-summary --dssp_dir af_dssp_dir/ \
+    --id_file XXX \
+    --sse_out_file af_100k_sse_XXX.tsv'
+```
+
+## Convert optimised domains to Foldseek DB
+
+Time: 2 hrs 7 mins on a single thread, RAM:1G
+
+Command:
+```
+cath-af-cli convert-cif-to-foldseek-db --cif_dir chopped_cif_after_optimisation/ \
+    --fs_querydb_dir af_foldseek_db/ \
+    --fs_bin_path /SAN/biosciences/alphafold/foldseek/bin/foldseek
+```
+
+Comment:
+- Added an option to generate a database only from a few structures. Needs to be specified as a CLI option using id_list
+- Investigate why m_core on CS is not seen by Foldseek. Increasing RAM available speeds up the process significantly.
+
+
+
+## Run Foldseek of Query_Domain_Structures against CATH_S95 Representatives
+
+Time: 2hrs 30
+
+Command: 
+```
+cath-af-cli run-foldseek --fs_querydb af_foldseek_db/af_query_foldseek.db \
+    --fs_targetdb /SAN/cath/cath_v4_3_0/databases/foldseek/cath_s95/cath_s95_db \
+    --fs_bin_path /SAN/biosciences/alphafold/foldseek/bin/foldseek 
+```
+
+## Convert Foldseek Results to Summary
+
+Time: 5 mins
+
+Command: 
+```
+cath-af-cli convert-foldseek-output-to-summary --id_file af_100k_domainlist_post_tailchop.txt \
+    --fs_input_file fs_query_results.m8 \
+    --fs_results fs_query_results.txt
+```
+
+Starting from 110,250 unique domain ids in foldseek output, resulting into 110,218 hits passing the thresholds.
+
+## Globularity prediction
+
+Time: 2hrs 6 mins
+
+Split into 16 processes. Processing 120,113 domains
+
+Command:
+```
+split -l 7500 af_100k_domainlist_post_tailchop.txt split_af_100k_globularity_ -a 5 -d
+find split_af_100k_globularity_000* | xargs -P16 -I XXX sh -c 'cath-af-cli measure-globularity --af_domain_list XXX \
+    --af_chain_mmcif_dir af_cif_raw/ \
+    --af_domain_globularity af_100k_globularity_XXX.txt'
+cat af_100k_globularity_split_af_100k_globularity_000* > af_100k_globularity.txt
+rm split_af_100k_globularity_000*
+af_100k_globularity_split_af_100k_globularity_000*
+```
+
+As this was done in chunks, remove the 16 additional occurences of the header from af_100k_globularity.txt
+
+
+## Combine results
+
+TBD
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/cath_alphaflow/cli.py b/cath_alphaflow/cli.py
index 1408751..79a7474 100644
--- a/cath_alphaflow/cli.py
+++ b/cath_alphaflow/cli.py
@@ -16,6 +16,7 @@
 from .commands import create_md5
 from .commands import convert_cif_to_fasta
 from .commands import load_mongo
+from .commands import measure_globularity
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s"
@@ -65,3 +66,4 @@ def dump_config():
 cli.add_command(create_md5.create_md5)
 cli.add_command(convert_cif_to_fasta.convert_cif_to_fasta)
 cli.add_command(load_mongo.load_af_from_archive)
+cli.add_command(measure_globularity.measure_globularity)
diff --git a/cath_alphaflow/commands/measure_globularity.py b/cath_alphaflow/commands/measure_globularity.py
new file mode 100644
index 0000000..82c5864
--- /dev/null
+++ b/cath_alphaflow/commands/measure_globularity.py
@@ -0,0 +1,249 @@
+from Bio.PDB import MMCIFParser, Selection, NeighborSearch
+import logging
+from pathlib import Path
+import gzip
+import numpy as np
+import click
+
+
+from cath_alphaflow.io_utils import get_af_domain_id_reader
+from cath_alphaflow.io_utils import get_csv_dictwriter
+from cath_alphaflow.models.domains import AFDomainID
+from cath_alphaflow.constants import DEFAULT_GLOB_DISTANCE, DEFAULT_GLOB_VOLUME
+
+LOG = logging.getLogger()
+
+# Input:
+# - `FILE:AF_CHAIN_MMCIF`
+# - `FILE:AF_DOMAIN_LIST`
+
+# Output:
+# - `FILE:AF_DOMAIN_LIST_POST_TAILCHOP` -- `(AF_domain_ID_tailchop)`
+# - `FILE:AF_DOMAIN_MAPPING_POST_TAILCHOP` -- `(AF_domain_ID_orig, AF_domain_ID_tailchop)`
+
+
+@click.command()
+@click.option(
+    "--af_domain_list",
+    type=click.File("rt"),
+    required=True,
+    help="Input: CSV file containing AF2 domains",
+)
+@click.option(
+    "--af_chain_mmcif_dir",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True, resolve_path=True),
+    required=True,
+    help="Input: directory of mmCIF files",
+)
+@click.option(
+    "--gzipped_af_chains",
+    type=bool,
+    default=False,
+    help="Set True if AF-chain files are stored in .gz files"
+)
+@click.option(
+    "--af_domain_globularity",
+    type=click.File("wt"),
+    required=True,
+    help="Output: CSV file for AF2 domain list including both globularity parameters",
+)
+@click.option(
+    "--distance_cutoff",
+    type=int,
+    required=False,
+    default=DEFAULT_GLOB_DISTANCE,
+    help=f"Distance cutoff for the packing density in Angstrom. (default: {DEFAULT_GLOB_DISTANCE})",
+)
+@click.option(
+    "--volume_resolution",
+    type=int,
+    required=False,
+    default=DEFAULT_GLOB_VOLUME,
+    help=f"The voxel resolution for approximating the protein volume. (default: {DEFAULT_GLOB_VOLUME})",
+)
+def measure_globularity(
+    af_domain_list,
+    af_chain_mmcif_dir,
+    af_domain_globularity,
+    distance_cutoff,
+    volume_resolution,
+    gzipped_af_chains,
+):
+    "Checks the globularity of the AF domain"
+
+    af_domain_list_reader = get_af_domain_id_reader(af_domain_list)
+
+    af_globularity_writer = get_csv_dictwriter(
+        af_domain_globularity,
+        fieldnames=["af_domain_id", "packing_density", "normed_radius_gyration"],
+    )
+    af_globularity_writer.writeheader()
+
+    click.echo(
+        f"Checking globularity for AF domain"
+        f"(mmcif_dir={af_chain_mmcif_dir}, in_file={af_domain_list.name}, "
+        f"out_file={af_domain_globularity.name} ) ..."
+    )
+
+    for af_domain_id in af_domain_list_reader:
+        LOG.debug(f"Working on: {af_domain_id} ...")
+
+        af_domain_packing_density = calculate_packing_density(
+            af_domain_id, af_chain_mmcif_dir, distance_cutoff,gzipped_af_chains
+        )
+
+        af_domain_normed_radius_gyration = calculate_normed_radius_of_gyration(
+            af_domain_id, af_chain_mmcif_dir,volume_resolution,gzipped_af_chains
+        )
+        LOG.debug(f"Processed entry: {af_domain_id}\tpacking_density:{af_domain_packing_density}\tgiration:{af_domain_normed_radius_gyration}")
+        af_globularity_writer.writerow(
+            {
+                "af_domain_id": af_domain_id,
+                "packing_density": af_domain_packing_density,
+                "normed_radius_gyration": af_domain_normed_radius_gyration,
+            }
+        )
+
+    click.echo("DONE")
+
+
+def calculate_packing_density(
+    af_domain_id: AFDomainID,
+    af_chain_mmcif_dir: Path,
+    distance_cutoff: int,
+    gzipped_af_chains,
+    *,
+    cif_filename=None,
+) -> AFDomainID:
+
+    chopping = af_domain_id.chopping
+    segments = chopping.segments
+    domain_residues = []
+    for segment in segments:
+        for res in range(segment.start,segment.end):
+            domain_residues.append(res)
+
+
+    # create default filename
+    if cif_filename is None:
+        if gzipped_af_chains == False:
+            open_func = open
+            cif_filename = af_domain_id.af_chain_id + ".cif"
+        else:
+            open_func = gzip.open
+            cif_filename = af_domain_id.af_chain_id + ".cif.gz"
+
+    cif_path = Path(af_chain_mmcif_dir, cif_filename)
+
+    with open_func(f"{cif_path}", mode="rt") as cif_fh:
+
+        structure = MMCIFParser(QUIET=1).get_structure("struc", cif_fh)
+
+        # List of hydrophobic residues
+        hydrophobic_list=['GLY','ALA','LEU','ILE','VAL','PRO','PHE','MET','TRP']
+        
+        neighbor_list_protein = []
+        # Get the number of nearby residues for all hydrophobic residues in the domain
+        for residue_number in domain_residues:
+            residue = structure[0]["A"][residue_number]
+            if residue.get_resname() in hydrophobic_list and not residue.is_disordered(): 
+                center_atoms = Selection.unfold_entities(residue, 'A')
+                atom_list = [atom for atom in structure.get_atoms()]
+                ns = NeighborSearch(atom_list)
+
+                nearby_residues = {residue for center_atom in center_atoms
+                    for residue in ns.search(center_atom.coord, distance_cutoff, 'R')}
+                neighbor_list_protein.append(len(nearby_residues))
+
+        return round(np.mean(neighbor_list_protein),3)
+
+
+# Function to get the approximated Volume of the domain
+def get_volume(structure,domain_residues,volume_resolution):
+
+    reduced_coords = list()
+    for residue_number in domain_residues:
+        residue = structure[0]["A"][residue_number]
+        for atm in residue.get_atoms():
+            x_reduced = int(atm.coord.tolist()[0]/volume_resolution)
+            y_reduced = int(atm.coord.tolist()[1]/volume_resolution)
+            z_reduced = int(atm.coord.tolist()[2]/volume_resolution)
+            reduced_coords.append([x_reduced,y_reduced,z_reduced])
+
+    unique_coords = [list(x) for x in set(tuple(x) for x in reduced_coords)]
+    volume = len(unique_coords) * volume_resolution**3
+   
+    return volume
+
+
+
+# Function to get the normed radius of gyration
+def calculate_normed_radius_of_gyration(
+    af_domain_id: AFDomainID,
+    af_chain_mmcif_dir: Path,
+    volume_resolution: int,
+    gzipped_af_chains,
+    *,
+    cif_filename=None,
+) -> AFDomainID:
+
+    chopping = af_domain_id.chopping
+    segments = chopping.segments
+    domain_residues = []
+    for segment in segments:
+        for res in range(segment.start,segment.end):
+            domain_residues.append(res)
+
+
+    if cif_filename is None:
+        if gzipped_af_chains == False:
+            open_func = open
+            cif_filename = af_domain_id.af_chain_id + ".cif"
+        else:
+            open_func = gzip.open
+            cif_filename = af_domain_id.af_chain_id + ".cif.gz"
+
+    cif_path = Path(af_chain_mmcif_dir, cif_filename)
+
+    with open_func(f"{cif_path}", mode="rt") as cif_fh:
+
+        structure = MMCIFParser(QUIET=1).get_structure("struc", cif_fh)
+
+    coords = []
+    masses = []
+    # Fill lists for the coords and masses for all atoms in the domain
+    for residue_number in domain_residues:
+        residue = structure[0]["A"][residue_number]
+
+        for atom in residue:
+            coords.append(atom.coord.tolist())
+            if atom.element == 'C':
+                masses.append(12.0107)
+            elif atom.element == 'H':    # We do not consider hydrogens for the radius of gyration
+                masses.append(0.0)
+            elif atom.element == 'O':
+                masses.append(15.9994)
+            elif atom.element == 'N':
+                masses.append(14.0067)
+            elif atom.element == 'S':
+                masses.append(32.065)    
+            else:
+                raise Exception(f'Protein contains unknown atom type {atom.element}')
+
+    # Calculate the radius of gyration
+    mass_coords = [(m*i, m*j, m*k) for (i, j, k), m in zip(coords, masses)]
+    total_mass = sum(masses)
+    r_tmp = sum(mi*i + mj*j + mk*k for (i, j, k), (mi, mj, mk) in zip (coords, mass_coords))
+    m_tmp = sum((sum(i) / total_mass)**2 for i in zip (*mass_coords))
+    radius_gyration = np.sqrt(r_tmp / total_mass-m_tmp)
+
+
+    # Calculate the radius of gyration for a sphere with the same volume as the protein
+    volume = get_volume(structure,domain_residues,volume_resolution)
+    radius=np.cbrt(volume)*3/4*np.pi
+    radius_gyration_sphere = np.sqrt(3/5*radius**2)
+
+    # Return normed radius of gyration
+    return(round(radius_gyration/radius_gyration_sphere,3))
+
+
diff --git a/cath_alphaflow/commands/optimise_domain_boundaries.py b/cath_alphaflow/commands/optimise_domain_boundaries.py
index a88bea4..6976a43 100644
--- a/cath_alphaflow/commands/optimise_domain_boundaries.py
+++ b/cath_alphaflow/commands/optimise_domain_boundaries.py
@@ -12,6 +12,7 @@
 from cath_alphaflow.errors import NoMatchingResiduesError
 from cath_alphaflow.io_utils import get_status_log_dictwriter
 from cath_alphaflow.constants import STATUS_LOG_SUCCESS,STATUS_LOG_FAIL
+from cath_alphaflow.seq_utils import get_local_plddt_for_res
 
 LOG = logging.getLogger()
 
@@ -133,21 +134,6 @@ def optimise_domain_boundaries(
 
     click.echo("DONE")
 
-
-def get_local_plddt_for_res(
-    structure,
-    residue_num: int,
-    *,
-    model_num: int = 0,
-    chain_id: str = "A",
-    atom_type: str = "CA",
-):
-    """
-    Returns the local pLDDT score for a given residue
-    """
-    return structure[model_num][chain_id][residue_num][atom_type].get_bfactor()
-
-
 def cut_segment(
     structure, segment_to_cut: Segment, cutoff_plddt_score, cut_start, cut_end
 ) -> Segment:
diff --git a/cath_alphaflow/constants.py b/cath_alphaflow/constants.py
index 9f21ec9..564774e 100644
--- a/cath_alphaflow/constants.py
+++ b/cath_alphaflow/constants.py
@@ -16,3 +16,5 @@
 VALID_CIF_SUFFIXES = [".cif", ".cif.gz"]
 STATUS_LOG_SUCCESS = "SUCCESS"
 STATUS_LOG_FAIL = "FAIL"
+DEFAULT_GLOB_DISTANCE = 5
+DEFAULT_GLOB_VOLUME = 5
diff --git a/cath_alphaflow/seq_utils.py b/cath_alphaflow/seq_utils.py
index f8dd479..25aa2ea 100644
--- a/cath_alphaflow/seq_utils.py
+++ b/cath_alphaflow/seq_utils.py
@@ -59,3 +59,16 @@ def combine_fasta_files(fasta_in_dir, fasta_out_file):
             if fasta_file.suffix in [".fasta", ".fa"]:
                 for seq_record in SeqIO.parse(fasta_file, "fasta"):
                     SeqIO.write(seq_record, fasta_out_fh, "fasta")
+
+def get_local_plddt_for_res(
+    structure,
+    residue_num: int,
+    *,
+    model_num: int = 0,
+    chain_id: str = "A",
+    atom_type: str = "CA",
+):
+    """
+    Returns the local pLDDT score for a given residue
+    """
+    return structure[model_num][chain_id][residue_num][atom_type].get_bfactor()