apcamargo · UriNeri · Sep 12, 2024 · Sep 14, 2024
diff --git a/genomad/cli.py b/genomad/cli.py
@@ -7,7 +7,7 @@
 from rich.console import Console
 from rich.padding import Padding
 from rich.panel import Panel
-
+# from genomad.modules.lazy_group import LazyGroup
 
 CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
 click.rich_click.USE_RICH_MARKUP = True
@@ -23,6 +23,7 @@
 click.rich_click.COMMAND_GROUPS = {
     "genomad": [
         {
+            # "cls" : LazyGroup,
             "name": "Database download",
             "commands": [
                 "download-database",

diff --git a/genomad/lazy_genomad.py b/genomad/lazy_genomad.py
@@ -0,0 +1,33 @@
+import os
+import rich_click as click
+from importlib import resources
+from genomad.modules.lazy_group import LazyGroup #, help_long 
+
+@click.group(name="Main",
+             cls=LazyGroup,
+             context_settings={'show_default': True, "help_option_names": ['-h',"-H", '--help']},
+             lazy_subcommands={
+                 "download-database": "genomad.modules.download.download_database",
+                 "end-to-end": "genomad.modules.endtoend.end_to_end",
+                 "annotate": "genomad.modules.annotate.annotate",
+                 "find-proviruses": "genomad.modules.find_proviruses.find_proviruses",
+                 "marker-classification": "genomad.modules.marker_classification.marker_classification",
+                 "summary": "genomad.modules.summary.summary",
+                 "nn-classification": "genomad.modules.nn_classification.nn_classification",
+                #  "aggregated-classification": "genomad.modules.aggregated_classification.aggregated_classification", #  where is this
+                #  "plasmid-score": "genomad.modules.plasmid_score.plasmid_score",
+                #  "virus-score": "genomad.modules.virus_score.virus_score",
+                 "taxonomy": "genomad.modules.taxonomy.taxonomy",
+                 "mini-annotate": "genomad.modules.mini_annotate.mini_annotate",
+                 "convert-genbank": "genomad.modules.convert_genbank.convert_genbank",
+                 "convert-fasta": "genomad.modules.convert_fasta.convert_fasta",
+                 "help": "genomad.modules.lazy_group.help_long",
+             }
+             )
+@click.version_option(prog_name="geNomad")
+def genomad():
+    """geNomad: Identification of mobile genetic elements"""
+    pass
+
+if __name__ == "__main__":
+    genomad()
diff --git a/genomad/modules/aggregated_classification.py b/genomad/modules/aggregated_classification.py
@@ -3,7 +3,8 @@
 import numpy as np
 from genomad import sequence, utils
 from genomad._paths import GenomadOutputs
-
+import rich_click as click
+from pathlib import Path
 
 def branch_attention(w: np.array, b1: np.array, b2: np.array, temperature: float = 2):
     w_1 = np.array(
@@ -26,6 +27,13 @@ def branch_attention(w: np.array, b1: np.array, b2: np.array, temperature: float
     output = np.matmul((b1 + b2) / 2, dense_layer_weights) + dense_layer_bias
     return utils.softmax(output, temperature)
 
+@click.command()
+@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.")
+@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+def aggregated_classification(input_path, output_path, restart, verbose):
+    main(input_path, output_path, restart, verbose)
 
 def main(input_path, output_path, restart, verbose):
     # Create `output_path` if it does not exist

diff --git a/genomad/modules/annotate.py b/genomad/modules/annotate.py
@@ -1,6 +1,7 @@
 import shutil
 import sys
-
+import rich_click as click
+from pathlib import Path
 from genomad import database, mmseqs2, prodigal, sequence, taxonomy, utils
 from genomad._paths import GenomadOutputs
 
@@ -46,8 +47,20 @@ def write_genes_output(genes_output, database_obj, prodigal_obj, mmseqs2_obj):
                 f"{taxid}\t{taxname}\t{conjscan}\t{amr}\t{accession}\t{description}\n"
             )
 
-
-def main(
+@click.command()
+@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.")
+@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.")
+@click.option("--use-minimal-db", is_flag=True, help="Use minimal database.")
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+@click.option("--threads", type=int, help="Number of threads to use.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+@click.option("--conservative-taxonomy", is_flag=True, help="Use conservative taxonomy.")
+@click.option("--sensitivity", type=str, help="Sensitivity level for MMseqs2.")
+@click.option("--evalue", type=float, help="E-value threshold for MMseqs2.")
+@click.option("--splits", type=int, help="Number of splits for MMseqs2.")
+@click.option("--cleanup", is_flag=True, help="Remove temporary files.")
+def annotate(
     input_path,
     output_path,
     database_path,
@@ -61,6 +74,9 @@ def main(
     splits,
     cleanup,
 ):
+    main(input_path, output_path, database_path, use_minimal_db, restart, threads, verbose, conservative_taxonomy, sensitivity, evalue, splits, cleanup)
+
+def main(input_path, output_path, database_path, use_minimal_db, restart, threads, verbose, conservative_taxonomy, sensitivity, evalue, splits, cleanup):
     # Create `output_path` if it does not exist
     if not output_path.is_dir():
         output_path.mkdir()

diff --git a/genomad/modules/download.py b/genomad/modules/download.py
@@ -3,7 +3,8 @@
 import urllib
 from functools import partial
 from urllib.request import urlopen
-
+import rich_click as click
+from pathlib import Path
 import genomad
 from genomad import utils
 from rich.progress import (
@@ -79,7 +80,13 @@ def download(self):
     def extract(self):
         shutil.unpack_archive(self.output_file, self.destination, "gztar")
 
-
+@click.command()
+@click.option("--destination", type=click.Path(path_type=Path), help="Path to the directory where the database will be downloaded.")
+@click.option("--keep", is_flag=True, help="Keep the downloaded database file.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+def download_database(destination, keep, verbose):
+    main(destination, keep, verbose)
+
 def main(destination, keep, verbose):
     console = utils.HybridConsole(verbose=verbose)
     database_downloader = DatabaseDownloader(destination, console)
@@ -103,3 +110,5 @@ def main(destination, keep, verbose):
         f"geNomad database (v{database_downloader.version}) is ready to be used!",
         style="yellow",
     )
+
+
diff --git a/genomad/modules/find_proviruses.py b/genomad/modules/find_proviruses.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List, Optional
+import rich_click as click
 
 import numpy as np
 import pycrfsuite
@@ -351,7 +352,7 @@ def yield_proviruses(
     threshold: float,
     in_edge_threshold: float,
     has_integrase_threshold: float,
-) -> Provirus:
+) -> Provirus: #  what is this? invalud type antonio
     total_count = 0
     count_array, value_array = utils.rle_encode(provirus_labels)
     n_islands = len(count_array)
@@ -388,8 +389,25 @@ def yield_proviruses(
                 )
         total_count += count
 
-
-def main(
+@click.command()
+@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.")
+@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.")
+@click.option("--cleanup", is_flag=True, help="Remove temporary files.")
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+@click.option("--skip-integrase-identification", is_flag=True, help="Skip integrase identification.")
+@click.option("--skip-trna-identification", is_flag=True, help="Skip tRNA identification.")
+@click.option("--crf-threshold", type=float, help="CRF threshold.")
+@click.option("--marker-threshold", type=float, help="Marker threshold.")
+@click.option("--marker-threshold-integrase", type=float, help="Marker threshold for integrases.")
+@click.option("--marker-threshold-edge", type=float, help="Marker threshold for edges.")
+@click.option("--max-integrase-distance", type=int, help="Maximum distance for integrases.")
+@click.option("--max-trna-distance", type=int, help="Maximum distance for tRNAs.")
+@click.option("--sensitivity", type=str, help="Sensitivity level for MMseqs2.")
+@click.option("--evalue", type=float, help="E-value threshold for MMseqs2.")
+@click.option("--threads", type=int, help="Number of threads to use.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+def find_proviruses(
     input_path,
     output_path,
     database_path,
@@ -408,6 +426,9 @@ def main(
     sensitivity,
     evalue,
 ):
+    main(input_path, output_path, database_path, cleanup, restart, skip_integrase_identification, skip_trna_identification, threads, verbose, crf_threshold, marker_threshold, marker_threshold_integrase, marker_threshold_edge, max_integrase_distance, max_trna_distance, sensitivity, evalue)
+
+def main(input_path, output_path, database_path, cleanup, restart, skip_integrase_identification, skip_trna_identification, threads, verbose, crf_threshold, marker_threshold, marker_threshold_integrase, marker_threshold_edge, max_integrase_distance, max_trna_distance, sensitivity, evalue):
     # Create `output_path` if it does not exist
     if not output_path.is_dir():
         output_path.mkdir()

diff --git a/genomad/modules/lazy_group.py b/genomad/modules/lazy_group.py
@@ -0,0 +1,53 @@
+import importlib
+import rich_click as click
+from rich.console import Console
+import pathlib as pt
+console = Console()
+
+# Adopted from https://click.palletsprojects.com/en/8.1.x/complex/#lazily-loading-subcommands
+class LazyGroup(click.RichGroup):
+    def __init__(self, *args, lazy_subcommands=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        # lazy_subcommands is a map of the form:
+        #
+        #   {command-name} -> {module-name}.{command-object-name}
+        #
+        self.lazy_subcommands = lazy_subcommands or {}
+
+    def list_commands(self, ctx):
+        base = super().list_commands(ctx)
+        lazy = sorted(self.lazy_subcommands.keys())
+        return base + lazy
+
+    def get_command(self, ctx, cmd_name):
+        if cmd_name in self.lazy_subcommands:
+            return self._lazy_load(cmd_name)
+        return super().get_command(ctx, cmd_name)
+
+    def _lazy_load(self, cmd_name):
+        # lazily loading a command, first get the module name and attribute name
+        import_path = self.lazy_subcommands[cmd_name]
+        modname, cmd_object_name = import_path.rsplit(".", 1)
+        # do the import
+        mod = importlib.import_module(modname)
+        # get the Command object from that module
+        cmd_object = getattr(mod, cmd_object_name) # type: ignore
+        # check the result to make debugging easier
+        if not isinstance(cmd_object, click.BaseCommand): # type: ignore
+            raise ValueError(
+                f"Lazy loading of {import_path} failed by returning "
+                "a non-command object"
+            )
+        return cmd_object
+
+
+@click.command()
+def help_long(**kwargs):
+    """
+    genomad does things
+    """
+    console.print("genomad does things")    
+
+
+if __name__ == "__main__":
+    help_long()
diff --git a/genomad/modules/marker_classification.py b/genomad/modules/marker_classification.py
@@ -4,7 +4,7 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import List
-
+import rich_click as click
 import numpy as np
 import xgboost as xgb
 from genomad import database, sequence, utils
@@ -334,7 +334,16 @@ def get_feature_array(
         np.array(marker_enrichment_array),
     )
 
-
+@click.command()
+@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.")
+@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--database-path", type=click.Path(path_type=Path), help="Path to the database directory.")
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+@click.option("--threads", type=int, help="Number of threads to use.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+def marker_classification(input_path, output_path, database_path, restart, threads, verbose):
+    main(input_path, output_path, database_path, restart, threads, verbose)
+
 def main(input_path, output_path, database_path, restart, threads, verbose):
     # Create `output_path` if it does not exist
     if not output_path.is_dir():

diff --git a/genomad/modules/nn_classification.py b/genomad/modules/nn_classification.py
@@ -2,7 +2,7 @@
 import shutil
 import sys
 from pathlib import Path
-
+import rich_click as click
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
@@ -17,8 +17,16 @@
     TimeRemainingColumn,
 )
 
-
-def main(
+@click.command()
+@click.option("--input-path", type=click.Path(path_type=Path), help="Path to the input file.")
+@click.option("--output-path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--single-window", is_flag=True, help="Use single window.")
+@click.option("--batch-size", type=int, help="Batch size.")
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+@click.option("--threads", type=int, help="Number of threads to use.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+@click.option("--cleanup", is_flag=True, help="Remove temporary files.")
+def nn_classification(
     input_path,
     output_path,
     single_window,
@@ -28,6 +36,9 @@ def main(
     verbose,
     cleanup,
 ):
+    main(input_path, output_path, single_window, batch_size, restart, threads, verbose, cleanup)
+
+def main(input_path, output_path, single_window, batch_size, restart, threads, verbose, cleanup):
     # To avoid having other modules lagging due to the slow TensorFlow import,
     # the `tensorflow` and `genomad.neural_network` modules are loaded inside `main`.
     # Additionally, the following functions that use the `tensorflow` module are

diff --git a/genomad/modules/score_calibration.py b/genomad/modules/score_calibration.py
@@ -1,6 +1,7 @@
 import sys
 from collections import Counter
-
+import rich_click as click
+from pathlib import Path
 import numpy as np
 from genomad import utils
 from genomad._paths import GenomadData, GenomadOutputs
@@ -49,6 +50,14 @@ def write_score_output(output_path, name_array, score_array):
         for n, (c_score, p_score, v_score) in zip(name_array, score_array):
             fout.write(f"{n}\t{c_score:.4f}\t{p_score:.4f}\t{v_score:.4f}\n")
 
+@click.command()
+@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.")
+@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--composition", type=str, help="Composition to use for the score calibration.")
+@click.option("--force_auto", is_flag=True, help="Force the use of the auto composition.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+def score_calibration(input_path, output_path, composition, force_auto, verbose):
+    main(input_path, output_path, composition, force_auto, verbose)
 
 def main(input_path, output_path, composition, force_auto, verbose):
     # Create `output_path` if it does not exist

diff --git a/genomad/modules/summary.py b/genomad/modules/summary.py
@@ -1,7 +1,8 @@
 import itertools
 import sys
 from collections import defaultdict
-
+import rich_click as click
+from pathlib import Path
 import numpy as np
 from genomad import sequence, utils
 from genomad._paths import GenomadOutputs
@@ -102,6 +103,24 @@ def flag_sequences(
         fdr_array[fdr_array < max_fdr],
     )
 
+@click.command()
+@click.option("--input_path", type=click.Path(path_type=Path), help="Path to the input FASTA file.")
+@click.option("--output_path", type=click.Path(path_type=Path), help="Path to the output directory.")
+@click.option("--verbose", is_flag=True, help="Enable verbose output.")
+@click.option("--min_score", type=float, help="Minimum score to use for the summary.")
+@click.option("--max_fdr", type=float, help="Maximum FDR to use for the summary.")
+@click.option("--min_number_genes", type=int, help="Minimum number of genes to use for the summary.")   
+@click.option("--min_plasmid_marker_enrichment", type=float, help="Minimum plasmid marker enrichment to use for the summary.")
+@click.option("--min_virus_marker_enrichment", type=float, help="Minimum virus marker enrichment to use for the summary.")
+@click.option("--min_plasmid_hallmarks", type=int, help="Minimum plasmid hallmarks to use for the summary.")
+@click.option("--min_plasmid_hallmarks_short_seqs", type=int, help="Minimum plasmid hallmarks for short sequences to use for the summary.")
+@click.option("--min_virus_hallmarks", type=int, help="Minimum virus hallmarks to use for the summary.")
+@click.option("--min_virus_hallmarks_short_seqs", type=int, help="Minimum virus hallmarks for short sequences to use for the summary.")
+@click.option("--max_uscg", type=int, help="Maximum USCGs to use for the summary.") 
+@click.option("--restart", is_flag=True, help="Restart the execution of the module.")
+def summary(input_path, output_path, verbose, min_score, max_fdr, min_number_genes, min_plasmid_marker_enrichment, min_virus_marker_enrichment, min_plasmid_hallmarks, min_plasmid_hallmarks_short_seqs, min_virus_hallmarks, min_virus_hallmarks_short_seqs, max_uscg, restart):
+    main(input_path, output_path, verbose, min_score, max_fdr, min_number_genes, min_plasmid_marker_enrichment, min_virus_marker_enrichment, min_plasmid_hallmarks, min_plasmid_hallmarks_short_seqs, min_virus_hallmarks, min_virus_hallmarks_short_seqs, max_uscg, restart)
+
 
 def main(
     input_path,