Merge branch 'main' into multi-ref-caprieval

haddocking · Dec 9, 2024 · 4d0d1ce · 4d0d1ce
2 parents 91908d0 + d97e9a2
commit 4d0d1ce
Show file tree

Hide file tree

Showing 24 changed files with 658 additions and 172 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -8,4 +8,4 @@ updates:
   - package-ecosystem: "pip" # See documentation for possible values
     directory: "/" # Location of package manifests
     schedule:
-      interval: "weekly"
+      interval: "monthly"
diff --git a/.github/workflows/validate_citation.yml b/.github/workflows/validate_citation.yml
@@ -0,0 +1,22 @@
+on:
+  push:
+    branches: [main]
+    paths:
+      - CITATION.cff
+  pull_request:
+  workflow_dispatch:
+
+name: CITATION.cff
+jobs:
+  Validate-CITATION-cff:
+    runs-on: ubuntu-latest
+    name: Validate CITATION.cff
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Validate CITATION.cff
+        uses: dieghernan/cff-validator@v3
diff --git a/CITATION.cff b/CITATION.cff
@@ -1,13 +1,13 @@
 # YAML 1.2
 # Metadata for citation of this software according to the CFF format (https://citation-file-format.github.io/)
-cff-version: 1.0.3
-message: If you use this software, please cite it using these metadata.
+cff-version: 1.2.0
+message: "If you use this software, please cite it using these metadata."
 # FIXME title as repository name might not be the best name, please make human readable
-title: 'haddocking/haddock3: v3.0.0-beta.5'
+title: 'HADDOCK3 - modular integrative modelling software'
 doi: 10.5281/zenodo.10527751
 # FIXME splitting of full names is error prone, please check if given/family name are correct
 authors:
-- given-names: João
+- given-names: Joao
   family-names: Teixeira
   name-particle: M.C.
   affiliation: Zymvol
@@ -21,8 +21,8 @@ authors:
 - given-names: Alexandre
   family-names: Bonvin
   affiliation: '@UtrechtUniversity'
-- given-names: SarahAlidoost
-  family-names: ''
+- given-names: Sarah
+  family-names: Alidoost
   affiliation: Netherlands eScience center
 - given-names: Victor
   family-names: Reys
@@ -41,14 +41,10 @@ authors:
   affiliation: Netherlands eScience Center
 - given-names: Barbara
   family-names: Vreede
-  affiliation:
 - given-names: SSchott
-  family-names: ''
-  affiliation:
 - given-names: Regen
   family-names: Tsai
-  affiliation:
-version: 3.0.0-beta.5
+version: "2024.10.0b6"
 date-released: 2024-01-18
 repository-code: https://github.com/haddocking/haddock3
-license: cc-by-4.0
+license: CC-BY-4.0
diff --git a/README.md b/README.md
@@ -23,30 +23,21 @@ HADDOCK, standing for **H**igh **A**mbiguity **D**riven protein-protein **DOCK**
 
 ## Installation
 
+Simple installation of the [latest release](https://pypi.org/project/haddock3/) of HADDOCK3. 
+
 ```bash
 pip install haddock3
 ```
 
-Execute:
+In case you rather install the latest unreleased version use instead:
 
 ```bash
-$ haddock3 -h
-usage: haddock3 [-h] [--restart RESTART] [--extend-run EXTEND_RUN] [--setup] [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-v] recipe
-
-positional arguments:
-  recipe                The input recipe file path
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --restart RESTART     Restart the run from a given step. Previous folders from the selected step onwards will be deleted.
-  --extend-run EXTEND_RUN
-                        Start a run from a run directory previously prepared with the `haddock3-copy` CLI. Provide the run directory created with `haddock3-copy` CLI.
-  --setup               Only setup the run, do not execute
-  --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
-  -v, --version         show version
+git clone https://github.com/haddocking/haddock3.git
+cd haddock3
+pip install .
 ```
 
-For the installation of third-party additional software, please check the [INSTALL](docs/INSTALL.md).
+For detailed instructions and installation of third-party software, please check [INSTALL.md](docs/INSTALL.md) 
 
 You might also want to check the following utilities:
 
@@ -62,6 +53,25 @@ The most basic usage is:
 haddock3 <configuration-file.toml>
 ```
 
+For help on haddock3 usage:
+
+```bash
+$ haddock3 -h
+usage: haddock3 [-h] [--restart RESTART] [--extend-run EXTEND_RUN] [--setup] [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [-v] recipe
+
+positional arguments:
+  recipe                The input recipe file path
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --restart RESTART     Restart the run from a given step. Previous folders from the selected step onwards will be deleted.
+  --extend-run EXTEND_RUN
+                        Start a run from a run directory previously prepared with the `haddock3-copy` CLI. Provide the run directory created with `haddock3-copy` CLI.
+  --setup               Only setup the run, do not execute
+  --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
+  -v, --version         show version
+```
+
 Check the [EXAMPLES](https://github.com/haddocking/haddock3/blob/main/examples/README.md) page for more some usage examples and the [MANUAL]() (_coming soon!_) for a more detailed explanation of the configuration file.
 
 ## Support

diff --git a/examples/thirdparty/openmm/openmm-implicit-solvent-test.cfg b/examples/thirdparty/openmm/openmm-implicit-solvent-test.cfg
@@ -29,8 +29,8 @@ implicit_solvent = true
 implicit_solvent_model = 'implicit/obc1.xml'
 timestep_ps = 0.002
 constraints = 'HBonds'
-equilibration_timesteps = 2000
-simulation_timesteps = 10000
+equilibration_timesteps = 100
+simulation_timesteps = 200
 
 # ====================================================================
 
diff --git a/examples/thirdparty/openmm/openmm-test.cfg b/examples/thirdparty/openmm/openmm-test.cfg
@@ -29,8 +29,8 @@ forcefield = 'amber14-all.xml'
 explicit_solvent_model = 'amber14/tip3p.xml'
 timestep_ps = 0.002
 constraints = 'HBonds'
-equilibration_timesteps = 2000
-simulation_timesteps = 10000
+equilibration_timesteps = 100
+simulation_timesteps = 200
 
 # ====================================================================
 
diff --git a/examples/thirdparty/openmm/openmm-topoaa-flexref-test.cfg b/examples/thirdparty/openmm/openmm-topoaa-flexref-test.cfg
@@ -30,6 +30,9 @@ hise_1 = 113
 ambig_fname="data/ambig.tbl"
 sampling = 5
 
+[seletop]
+select = 1
+
 [caprieval]
 reference_fname = "data/1nx1_refe.pdb"
 
@@ -38,8 +41,8 @@ forcefield = 'amber14-all.xml'
 explicit_solvent_model = 'amber14/tip3p.xml'
 timestep_ps = 0.002
 constraints = 'HBonds'
-equilibration_timesteps = 200
-simulation_timesteps = 1000
+equilibration_timesteps = 100
+simulation_timesteps = 200
 
 [topoaa]
 
@@ -62,5 +65,6 @@ ssdihed = "alphabeta"
 
 [caprieval]
 reference_fname = "data/1nx1_refe.pdb"
+
 # ====================================================================
 
diff --git a/integration_tests/golden_data/workflow.cfg b/integration_tests/golden_data/workflow.cfg
@@ -0,0 +1,12 @@
+run_dir = "run"
+molecules = [
+    "prot.pdb",
+    "cyclic-peptide.pdb"
+]
+[topoaa]
+[rigidbody]
+sampling=4
+cmrest=True
+[clustfcc]
+min_population=1
+[caprieval]
diff --git a/integration_tests/test_emscoring.py b/integration_tests/test_emscoring.py
@@ -24,6 +24,7 @@ def emscoring_module():
         )
         # lower number of steps for faster testing
         emscoring_module.params["nemsteps"] = 5
+        emscoring_module.params["per_interface_scoring"] = True
         yield emscoring_module
 
 
@@ -76,3 +77,11 @@ def test_emscoring_default(emscoring_module, calc_fnat):
     )
     assert fnat == pytest.approx(0.95, abs=0.1)
 
+    # check the interface scoring
+    expected_interface_csv = Path(emscoring_module.path, "emscoring_A_B.tsv")
+    assert expected_interface_csv.exists(), f"{expected_interface_csv} does not exist"
+    df_perint = pd.read_csv(expected_interface_csv, sep="\t", comment="#")
+    # check that the score is equal to the global score (it's a dimer!)
+    assert df_perint["score"].tolist() == df["score"].tolist()
+
+
diff --git a/integration_tests/test_full_workflow.py b/integration_tests/test_full_workflow.py
@@ -47,3 +47,77 @@ def test_emscoring_workflow(caplog, monkeypatch):
         assert os.path.isdir("analysis/1_emscoring_analysis") is True
         # there should be a report.html inside it
         assert os.path.isfile("analysis/1_emscoring_analysis/report.html") is True
+
+
+def test_interactive_analysis_on_workflow(monkeypatch):
+    """A comprehensive test for the interactive commands and analysis."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # copy the files to the temporary directory
+        files = [
+            "cyclic-peptide.pdb",
+            "prot.pdb",
+            "workflow.cfg",
+        ]
+        for fl in files:
+            shutil.copy(
+                Path(GOLDEN_DATA, fl),
+                Path(tmpdir, fl),
+            )
+
+        monkeypatch.chdir(tmpdir)
+
+        from haddock.clis.cli import main as cli_main
+        cli_main(
+            Path("workflow.cfg"),
+        )
+        # read run_dir in workflow.cfg
+        with open("workflow.cfg", "r") as f:
+            for line in f:
+                if "run_dir" in line:
+                    run_dir = line.split("=")[1].strip().strip('"')
+                    break
+        assert os.path.isdir(run_dir) is True
+        assert os.path.isdir(Path(run_dir, "0_topoaa")) is True
+        assert os.path.isdir(Path(run_dir, "1_rigidbody")) is True
+        assert os.path.isdir(Path(run_dir, "2_clustfcc")) is True
+        assert os.path.isdir(Path(run_dir, "3_caprieval")) is True
+        assert os.path.isdir(Path(run_dir, "analysis")) is True
+
+        # now running interactive re-clustering
+        clustfcc_dir = f"{run_dir}/2_clustfcc"
+        from haddock.clis.cli_re import maincli
+        # faking sys.argv in input to haddock3-re
+        monkeypatch.setattr("sys.argv",
+                            ["haddock3-re", "clustfcc", clustfcc_dir, "-f", "0.7"]
+                            )
+        maincli()
+        assert os.path.isdir(Path(run_dir, "2_clustfcc_interactive")) is True
+        assert Path(run_dir, "2_clustfcc_interactive/clustfcc.tsv").exists() is True
+        assert Path(run_dir, "2_clustfcc_interactive/clustfcc.txt").exists() is True
+
+        # now running interactive re-scoring
+        capri_dir = f"{run_dir}/3_caprieval"
+        # faking sys.argv in input to haddock3-re
+        monkeypatch.setattr("sys.argv",
+                            ["haddock3-re", "score", capri_dir, "-a", "1.0"]
+                            )
+        maincli()
+        assert os.path.isdir(Path(run_dir, "3_caprieval_interactive")) is True
+        assert Path(run_dir, "3_caprieval_interactive/capri_ss.tsv").exists() is True
+
+        # now analyse the interactive folders
+        from haddock.clis.cli_analyse import main as cli_analyse
+        cli_analyse(run_dir,
+                    [2,3],
+                    10,
+                    format=None,
+                    scale=None,
+                    is_cleaned=True,
+                    inter=True)
+        exp_clustfcc_dir = Path(run_dir, "analysis", "2_clustfcc_interactive_analysis")
+        exp_caprieval_dir = Path(run_dir, "analysis", "3_caprieval_interactive_analysis")
+        assert os.path.isdir(exp_clustfcc_dir) is True
+        assert os.path.isdir(exp_caprieval_dir) is True
+        assert Path(exp_clustfcc_dir, "report.html").exists() is True
+        assert Path(exp_caprieval_dir, "report.html").exists() is True
+
diff --git a/integration_tests/test_mdscoring.py b/integration_tests/test_mdscoring.py
@@ -22,6 +22,11 @@ def mdscoring_module():
         mdscoring_module = mdscoringModule(
             order=0, path=Path(tmpdir), initial_params=DEFAULT_MDSCORING_CONFIG
         )
+        # lower number of steps for faster testing
+        mdscoring_module.params["watersteps"] = 200
+        mdscoring_module.params["watercoolsteps"] = 200
+        # enable per interface scoring
+        mdscoring_module.params["per_interface_scoring"] = True
         yield mdscoring_module
 
 
@@ -82,3 +87,10 @@ def test_mdscoring_default(mdscoring_module, calc_fnat):
         native=Path(GOLDEN_DATA, "protglyc_complex_1.pdb"),
     )
     assert fnat == pytest.approx(0.90, abs=0.1)
+
+    # check the interface scoring
+    expected_interface_csv = Path(mdscoring_module.path, "mdscoring_A_B.tsv")
+    assert expected_interface_csv.exists(), f"{expected_interface_csv} does not exist"
+    df_perint = pd.read_csv(expected_interface_csv, sep="\t", comment="#")
+    # check that the score is equal to the global score (it's a dimer!)
+    assert df_perint["score"].tolist() == df["score"].tolist()
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "haddock3"
-version = "2024.10.0b6"
+version = "2024.10.0b7"
 description = "HADDOCK3"
 readme = "README.md"
 authors = [{ name = "BonvinLab", email = "bonvinlab.support@uu.nl" }]
@@ -48,15 +48,15 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-  "coverage==7.6.4",
+  "coverage==7.6.8",
   "pytest==8.3.3",
   "pytest-cov==6.0.0",
-  "hypothesis==6.116.0",
+  "hypothesis==6.122.0",
   "pytest-mock==3.14.0",
-  "fastapi==0.115.4",
-  "httpx==0.27.2",
+  "fastapi==0.115.5",
+  "httpx==0.28.0",
   "mpi4py==4.0.1",
-  "kaleido==0.2.1",
+  "kaleido==0.4.1",
   "pytest-random-order==1.1.1",
 
 ]

diff --git a/src/haddock/clis/cli_analyse.py b/src/haddock/clis/cli_analyse.py
@@ -268,7 +268,7 @@ def run_capri_analysis(
     io.load(filename)
     # unpack the files if they are compressed
     if is_cleaned:
-        path_to_unpack = io.output[0].path
+        path_to_unpack = io.output[0].rel_path.parent
         haddock3_unpack(path_to_unpack, ncores=ncores)
     # define step_order. We add one to it, as the caprieval module will
     # interpret itself as being after the selected step