MannLabs · mschwoer · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 10, 2024
diff --git a/.github/workflows/_run_tests.yml b/.github/workflows/_run_tests.yml
@@ -34,6 +34,11 @@ jobs:
     - name: Conda info
       shell: bash -l {0}
       run: conda info
+    - name: Install mono
+      shell: bash -l {0}
+      run: |
+        conda install mono
+
     - name: Perform pip installation with all stable dependencies
       shell: bash -l {0}
       run: |

diff --git a/.github/workflows/e2e_testing.yml b/.github/workflows/e2e_testing.yml
@@ -13,9 +13,10 @@ jobs:
     runs-on: self-hosted
     if: contains(github.event.pull_request.labels.*.name, 'test:e2e') || github.event_name == 'push' || github.event_name == 'workflow_dispatch'
     strategy:
+      fail-fast: false
       matrix:
         # test case name as defined in e2e_test_cases.yaml
-        test_case: [ "basic", "synchropasef", "astral", "astral_automatic_calibration", ]
+        test_case: [ "basic", "synchropasef", "astral", "astral_automatic_calibration", "multistep"]
     env:
       RUN_NAME: alphadia-${{github.sha}}-${{github.run_id}}-${{github.run_attempt}}
       BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -48,6 +49,10 @@ jobs:
           conda remove -n $RUN_NAME --all -y
       - name: Delete Caches on Error
         if: ${{ failure() && steps.pip_installation.conclusion == 'failure' }}
+        shell: bash -el {0}
         run: |
           rm -rf ~/.cache/pip
           rm -rf ~/.cache/conda
+
+          # Exit with error code to fail the job
+          exit 1
diff --git a/alphadia/cli.py b/alphadia/cli.py
@@ -11,11 +11,13 @@
 import re
 import sys
 
+import matplotlib
 import yaml
 
 import alphadia
 from alphadia import utils
 from alphadia.exceptions import CustomError
+from alphadia.search_plan import SearchPlan
 from alphadia.workflow import reporting
 
 logger = logging.getLogger()
@@ -205,6 +207,7 @@ def run(*args, **kwargs):
 
     raw_path_list = _get_raw_path_list_from_args_and_config(args, user_config)
     logger.progress(f"Searching {len(raw_path_list)} files:")
+
     for f in raw_path_list:
         logger.progress(f"  {os.path.basename(f)}")
 
@@ -225,24 +228,18 @@ def run(*args, **kwargs):
     if quant_dir is not None:
         logger.progress(f"Saving quantification output to {quant_dir=}")
 
-    try:
-        import matplotlib
-
-        # important to suppress matplotlib output
-        matplotlib.use("Agg")
+    # important to suppress matplotlib output
+    matplotlib.use("Agg")
 
-        from alphadia.planning import Plan
-
-        plan = Plan(
+    try:
+        SearchPlan(
             output_directory,
             raw_path_list=raw_path_list,
             library_path=library_path,
             fasta_path_list=fasta_path_list,
             config=user_config,
             quant_path=quant_dir,
-        )
-
-        plan.run()
+        ).run_plan()
 
     except Exception as e:
         if isinstance(e, CustomError):

diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
@@ -423,3 +423,8 @@ calibration_manager:
             - mobility_observed
           output_columns:
             - mobility_calibrated
+
+# scope of default yaml should be one search step
+multistep_search:
+  transfer_step_enabled: False
+  mbr_step_enabled: False
diff --git a/alphadia/constants/multistep.yaml b/alphadia/constants/multistep.yaml
@@ -0,0 +1,47 @@
+# configuration for multistep search
+# for each of the three steps, the configuration values defined here override the default values and the values defined by the user
+
+# future : default.yaml -> user.yaml -> multistep_user.yaml -> multistep.yaml
+
+transfer:
+#  library_prediction:  # should be done in 99% of cases
+#      predict: True
+  transfer_library:
+    enabled: True
+  transfer_learning:
+    enabled: True
+
+  # override settings that could have been set by the user:
+  general:
+    save_library: False
+    reuse_quant: False
+  # TODO: think about enforcing optimization of rt here
+
+library:
+# the step following TL needs to have this. It will be forced to true only (by code) if transfer step was done before
+#  library_prediction:
+#    predict: True
+
+  # override settings that could have been set by the user:
+  general:
+    save_library: False
+    reuse_quant: False
+  transfer_library:
+    enabled: False
+  transfer_learning:
+    enabled: False
+
+mbr:
+  fdr:
+    inference_strategy: library
+  search:
+    target_num_candidates: 5
+  # override settings that could have been set by the user:
+  general:
+    reuse_quant: False
+  library_prediction:
+    predict: False
+  transfer_library:
+    enabled: False
+  transfer_learning:
+    enabled: False
diff --git a/alphadia/outputtransform.py b/alphadia/outputtransform.py
@@ -26,8 +26,15 @@
 )
 from alphadia.transferlearning.train import FinetuneManager
 from alphadia.workflow import manager, peptidecentric
+from alphadia.workflow.config import Config
 from alphadia.workflow.managers.raw_file_manager import RawFileManager
 
+# TODO move to a class with the rest of the constants
+MS1_ERROR = "ms1_error"
+MS2_ERROR = "ms2_error"
+
+OPTIMIZATION_PREFIX = "optimization."
+
 logger = logging.getLogger()
 
 
@@ -306,7 +313,7 @@ class SearchPlanOutput:
     TRANSFER_MODEL = "peptdeep.transfer"
     TRANSFER_STATS_OUTPUT = "stats.transfer"
 
-    def __init__(self, config: dict, output_folder: str):
+    def __init__(self, config: Config, output_folder: str):
         """Combine individual searches into and build combined outputs
 
         In alphaDIA the search plan orchestrates the library building preparation,
@@ -974,16 +981,15 @@ def _build_run_stat_df(
             optimization_manager = manager.OptimizationManager(
                 path=optimization_manager_path
             )
-            optimization_stats["ms2_error"] = optimization_manager.ms2_error
-            optimization_stats["ms1_error"] = optimization_manager.ms1_error
+            optimization_stats[MS2_ERROR] = optimization_manager.ms2_error
+            optimization_stats[MS1_ERROR] = optimization_manager.ms1_error
             optimization_stats["rt_error"] = optimization_manager.rt_error
             optimization_stats["mobility_error"] = optimization_manager.mobility_error
         else:
             logger.warning(f"Error reading optimization manager for {raw_name}")
 
-        prefix = "optimization."
-        for key in ["ms2_error", "ms1_error", "rt_error", "mobility_error"]:
-            stats[f"{prefix}{key}"] = optimization_stats[key]
+        for key in [MS2_ERROR, MS1_ERROR, "rt_error", "mobility_error"]:
+            stats[f"{OPTIMIZATION_PREFIX}{key}"] = optimization_stats[key]
 
         # collect calibration stats
         calibration_stats = defaultdict(lambda: np.nan)

diff --git a/alphadia/planning.py b/alphadia/planning.py
@@ -28,12 +28,14 @@
 from alphadia.exceptions import CustomError
 from alphadia.workflow import peptidecentric, reporting
 from alphadia.workflow.base import WorkflowBase
-from alphadia.workflow.config import Config
+from alphadia.workflow.config import MULTISTEP_SEARCH, USER_DEFINED, Config
+
+SPECLIB_FILE_NAME = "speclib.hdf"
 
 logger = logging.getLogger()
 
 
-class Plan:
+class Plan:  # TODO rename -> SearchStep, planning.py -> search_step.py
     def __init__(
         self,
         output_folder: str,
@@ -42,6 +44,7 @@ def __init__(
         fasta_path_list: list[str] | None = None,
         config: dict | Config | None = None,
         config_base_path: str | None = None,
+        extra_config: dict | None = None,
         quant_path: str | None = None,
     ) -> None:
         """Highest level class to plan a DIA Search.
@@ -64,15 +67,19 @@ def __init__(
             list of fasta file locations to build the library from
 
         config_base_path : str, optional
-            yaml file containing the default config.
+            user-provided yaml file containing the default config.
 
         config : dict, optional
-            dict to update the default config. Can be used for debugging purposes etc.
+            user-provided dict to update the default config. Can be used for debugging purposes etc.
+
+        extra_config : dict, optional
+            dict to update the final config. Used for multistep searches.
 
         quant_path : str, optional
             path to directory to save the quantification results (psm & frag parquet files). If not provided, the results are saved in the usual workflow folder
 
         """
+
         if config is None:
             config = {}
         if fasta_path_list is None:
@@ -81,32 +88,29 @@ def __init__(
             raw_path_list = []
 
         self.output_folder = output_folder
+        os.makedirs(output_folder, exist_ok=True)
+        reporting.init_logging(self.output_folder)
+
         self.raw_path_list = raw_path_list
         self.library_path = library_path
         self.fasta_path_list = fasta_path_list
         self.quant_path = quant_path
 
         self.spectral_library = None
 
-        # needs to be done before any logging:
-        reporting.init_logging(self.output_folder)
-
-        self._print_logo()
-
-        self._print_environment()
-
-        self._config = self._init_config(config, output_folder, config_base_path)
+        self._config = self._init_config(
+            config, extra_config, output_folder, config_base_path
+        )
 
-        level_to_set = self._config["general"]["log_level"]
-        level_code = logging.getLevelName(level_to_set)
-        logger.setLevel(level_code)
+        logger.setLevel(logging.getLevelName(self._config["general"]["log_level"]))
 
         self.init_alphabase()
         self.load_library()
 
         torch.set_num_threads(self._config["general"]["thread_count"])
 
-    def _print_logo(self) -> None:
+    @staticmethod
+    def print_logo() -> None:  # TODO move elsewhere
         """Print the alphadia logo and version."""
         logger.progress("          _      _         ___ ___   _   ")
         logger.progress(r"     __ _| |_ __| |_  __ _|   \_ _| /_\  ")
@@ -119,6 +123,7 @@ def _print_logo(self) -> None:
     def _init_config(
         self,
         user_config: dict | Config,
+        extra_config: dict,
         output_folder: str,
         config_base_path: str | None,
     ) -> Config:
@@ -131,20 +136,29 @@ def _init_config(
                 os.path.dirname(__file__), "constants", "default.yaml"
             )
 
-        logger.info(f"loading default config from {config_base_path}")
+        logger.info(f"loading config from {config_base_path}")
         config = Config()
         config.from_yaml(config_base_path)
 
+        config_updates = []
         # load update config from dict
         if isinstance(user_config, dict):
-            update_config = Config("user defined")
-            update_config.from_dict(user_config)
+            user_config_update = Config(USER_DEFINED)
+            user_config_update.from_dict(user_config)
+            config_updates.append(user_config_update)
         elif isinstance(user_config, Config):
-            update_config = user_config
+            config_updates.append(user_config)
         else:
             raise ValueError("'config' parameter must be of type 'dict' or 'Config'")
 
-        config.update([update_config], print_modifications=True)
+        if extra_config is not None:
+            extra_config_update = Config(MULTISTEP_SEARCH)
+            extra_config_update.from_dict(extra_config)
+            # need to overwrite user-defined output folder here
+            extra_config["output"] = output_folder
+            config_updates.append(extra_config_update)
+
+        config.update(config_updates, print_modifications=True)
 
         if "output" not in config:
             config["output"] = output_folder
@@ -169,7 +183,8 @@ def spectral_library(self) -> SpecLibFlat:
     def spectral_library(self, spectral_library: SpecLibFlat) -> None:
         self._spectral_library = spectral_library
 
-    def _print_environment(self) -> None:
+    @staticmethod
+    def print_environment() -> None:  # TODO move elsewhere
         """Log information about the python environment."""
 
         logger.progress(f"hostname: {socket.gethostname()}")
@@ -285,7 +300,7 @@ def _parse_modifications(mod_str: str) -> list[str]:
             )
             spectral_library = multiplexing(spectral_library)
 
-        library_path = os.path.join(self.output_folder, "speclib.hdf")
+        library_path = os.path.join(self.output_folder, SPECLIB_FILE_NAME)
         logger.info(f"Saving library to {library_path}")
         spectral_library.save_hdf(library_path)
 
@@ -344,15 +359,15 @@ def run(
                 raise e
 
             finally:
-                if workflow.reporter:
+                if workflow and workflow.reporter:
                     workflow.reporter.log_string(f"Finished workflow for {raw_name}")
                     workflow.reporter.context.__exit__(None, None, None)
                 del workflow
 
         try:
             base_spec_lib = SpecLibBase()
             base_spec_lib.load_hdf(
-                os.path.join(self.output_folder, "speclib.hdf"), load_mod_seq=True
+                os.path.join(self.output_folder, SPECLIB_FILE_NAME), load_mod_seq=True
             )
 
             output = outputtransform.SearchPlanOutput(self.config, self.output_folder)
@@ -415,7 +430,7 @@ def _process_raw_file(
     def _clean(self):
         if not self.config["general"]["save_library"]:
             try:
-                os.remove(os.path.join(self.output_folder, "speclib.hdf"))
+                os.remove(os.path.join(self.output_folder, SPECLIB_FILE_NAME))
             except Exception as e:
                 logger.exception(f"Error deleting library: {e}")