aertslab · klprint · Apr 7, 2021 · Apr 16, 2021 · May 5, 2021 · May 7, 2021
diff --git a/.github/ISSUE_TEMPLATE/bug-error-report.md b/.github/ISSUE_TEMPLATE/bug-error-report.md
@@ -10,6 +10,8 @@ assignees: ''
 **Describe the bug**
 A clear and concise description of what the bug is.
 
+> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected.
+
 **Steps to reproduce the behavior**
 1. Command run when the error occurred:
 <!-- Please specify the command used (if applicable, otherwise delete this block): -->

diff --git a/.github/ISSUE_TEMPLATE/no-questions-here-please-.md b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
@@ -0,0 +1,14 @@
+---
+name: No questions here please!
+about: Questions about the results, design, or run strategy for pySCENIC
+title: "[results]"
+labels: question
+assignees: ''
+
+---
+
+For **questions** about using SCENIC, please use the Discussions: https://github.com/aertslab/SCENIC/discussions/
+
+Create an issue only to report **bugs**.
+
+> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected.
diff --git a/.github/ISSUE_TEMPLATE/results-design-questions.md b/.github/ISSUE_TEMPLATE/results-design-questions.md
diff --git a/README.rst b/README.rst
@@ -113,6 +113,7 @@ Additional resources
 For more information, please visit LCB_, 
 the main `SCENIC website <https://scenic.aertslab.org/>`_,
 or `SCENIC (R version) <https://github.com/aertslab/SCENIC>`_.
+There is a tutorial to `create new cisTarget databases <https://github.com/aertslab/create_cisTarget_databases>`_.
 The CLI to pySCENIC has also been streamlined into a pipeline that can be run with a single command, using the Nextflow workflow manager.
 There are two Nextflow implementations available:
 

diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py
@@ -228,6 +228,7 @@ def prune_targets_command(args):
             client_or_address=args.mode,
             module_chunksize=args.chunk_size,
             num_workers=args.num_workers,
+            frac_mapping_module=args.frac_mapping_module
         )
 
     LOGGER.info("Writing results to file.")
@@ -372,6 +373,9 @@ def add_module_parameters(parser):
     group.add_argument(
         '--min_genes', type=int, default=20, help='The minimum number of genes in a module (default: 20).'
     )
+    group.add_argument(
+        '--frac_mapping_module', type=float, default=0.8, help='Minimum fraction of genes per module needed to be annotated in the database (default: 0.2)'
+    )
     group.add_argument(
         '--expression_mtx_fname',
         type=argparse.FileType('r'),

diff --git a/src/pyscenic/prune.py b/src/pyscenic/prune.py
@@ -358,6 +358,7 @@ def prune2df(
     num_workers=None,
     module_chunksize=100,
     filter_for_annotation=True,
+    frac_mapping_module=0.8
 ) -> pd.DataFrame:
     """
     Calculate all regulons for a given sequence of ranking databases and a sequence of co-expression modules.
@@ -391,7 +392,7 @@ def prune2df(
         filter_for_annotation=filter_for_annotation,
     )
     transformation_func = partial(
-        modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery
+        modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery, frac_mapping_module=frac_mapping_module
     )
     # Create a distributed dataframe from individual delayed objects to avoid out of memory problems.
     aggregation_func = (

diff --git a/src/pyscenic/transform.py b/src/pyscenic/transform.py
@@ -223,6 +223,7 @@ def module2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
+    frac_mapping_module=0.8,
 ) -> pd.DataFrame:
     """ """
     # Derive enriched and TF-annotated features for module.
@@ -241,9 +242,9 @@ def module2df(
     # If less than 80% of the genes are mapped to the ranking database, the module is skipped.
     n_missing = len(module) - len(genes)
     frac_missing = float(n_missing) / len(module)
-    if frac_missing >= 0.20:
+    if frac_missing >= (1-frac_mapping_module):
         LOGGER.warning(
-            "Less than 80% of the genes in {} could be mapped to {}. Skipping this module.".format(module.name, db.name)
+            "Less than {}% of the genes in {} could be mapped to {}. Skipping this module.".format(frac_mapping_module*100,module.name, db.name)
         )
         return DF_META_DATA
 
@@ -293,12 +294,13 @@ def modules2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
+    frac_mapping_module=0.8,
 ) -> pd.DataFrame:
     # Make sure return recovery curves is always set to false because the metadata for the distributed dataframe needs
     # to be fixed for the dask framework.
     # TODO: Remove this restriction.
     return pd.concat(
-        [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func) for module in modules]
+        [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func, frac_mapping_module) for module in modules]
     )
 
 
@@ -443,6 +445,7 @@ def module2regulon(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
+        frac_mapping_module=0.8,
     )
     if len(df) == 0:
         return None
@@ -467,5 +470,6 @@ def modules2regulons(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
+        frac_mapping_module=0.8,
     )
     return [] if len(df) == 0 else df2regulons(df)