From 79b97eddafcc554117488b31e9a4432fba863a2a Mon Sep 17 00:00:00 2001
From: Chris Flerin <ccflerin@gmail.com>
Date: Wed, 7 Apr 2021 12:27:08 +0200
Subject: [PATCH 1/8] Updates to readme

---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 7b43f1d..6a5ea04 100644
--- a/README.rst
+++ b/README.rst
@@ -88,7 +88,8 @@ All the functionality of the original R implementation is available and in addit
 Additional resources
 --------------------
 
-For more information, please visit LCB_, or SCENIC_ (R version).
+For more information, please visit the main SCENIC_ website.
+There is a tutorial to `create new cisTarget databases <https://github.com/aertslab/create_cisTarget_databases>`_.
 The CLI to pySCENIC has also been streamlined into a pipeline that can be run with a single command, using the Nextflow workflow manager.
 There are two Nextflow implementations available:
 
@@ -124,7 +125,6 @@ References
 .. _dask: https://dask.pydata.org/en/latest/
 .. _distributed: https://distributed.readthedocs.io/en/latest/
 .. _arboreto: https://arboreto.readthedocs.io
-.. _LCB: https://aertslab.org
 .. _`SCENICprotocol`: https://github.com/aertslab/SCENICprotocol
 .. _`VSNPipelines`: https://github.com/vib-singlecell-nf/vsn-pipelines
 .. _notebooks: https://github.com/aertslab/pySCENIC/tree/master/notebooks

From c66455cdc0d736d67963b218a8ebce61dc4eef82 Mon Sep 17 00:00:00 2001
From: Chris Flerin <ccflerin@gmail.com>
Date: Fri, 7 May 2021 21:27:00 +0200
Subject: [PATCH 2/8] Fix missing link in readme

---
 README.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.rst b/README.rst
index 450cc11..22d5b7b 100644
--- a/README.rst
+++ b/README.rst
@@ -148,6 +148,7 @@ References
 .. _dask: https://dask.pydata.org/en/latest/
 .. _distributed: https://distributed.readthedocs.io/en/latest/
 .. _arboreto: https://arboreto.readthedocs.io
+.. _LCB: https://aertslab.org
 .. _`SCENICprotocol`: https://github.com/aertslab/SCENICprotocol
 .. _`VSNPipelines`: https://github.com/vib-singlecell-nf/vsn-pipelines
 .. _notebooks: https://github.com/aertslab/pySCENIC/tree/master/notebooks

From 5569fb6c8a627ae915a328c9afe05831f259ebdf Mon Sep 17 00:00:00 2001
From: Sara Aibar <20438544+s-aibar@users.noreply.github.com>
Date: Mon, 11 Apr 2022 14:21:46 +0200
Subject: [PATCH 3/8] Update issue/question template

---
 .github/ISSUE_TEMPLATE/no-questions-here-please-.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/no-questions-here-please-.md

diff --git a/.github/ISSUE_TEMPLATE/no-questions-here-please-.md b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
new file mode 100644
index 0000000..9b8b6be
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
@@ -0,0 +1,12 @@
+---
+name: No questions here please!
+about: Questions about the results, design, or run strategy for pySCENIC
+title: "[results]"
+labels: question
+assignees: ''
+
+---
+
+For **questions** about using SCENIC, please use the Discussions: https://github.com/aertslab/SCENIC/discussions/
+
+Create an issue only to report **bugs** unexpected errors or feature requests.

From ba06f54c1cf2acd82ddac3d1c7b1da756a46d9b3 Mon Sep 17 00:00:00 2001
From: Sara Aibar <20438544+s-aibar@users.noreply.github.com>
Date: Tue, 12 Apr 2022 09:43:37 +0200
Subject: [PATCH 4/8] Update issue templates

---
 .github/ISSUE_TEMPLATE/bug-error-report.md          |  2 ++
 .github/ISSUE_TEMPLATE/no-questions-here-please-.md |  4 +++-
 .github/ISSUE_TEMPLATE/results-design-questions.md  | 10 ----------
 3 files changed, 5 insertions(+), 11 deletions(-)
 delete mode 100644 .github/ISSUE_TEMPLATE/results-design-questions.md

diff --git a/.github/ISSUE_TEMPLATE/bug-error-report.md b/.github/ISSUE_TEMPLATE/bug-error-report.md
index 80e3d5b..a0d3edc 100644
--- a/.github/ISSUE_TEMPLATE/bug-error-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-error-report.md
@@ -10,6 +10,8 @@ assignees: ''
 **Describe the bug**
 A clear and concise description of what the bug is.
 
+> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected.
+
 **Steps to reproduce the behavior**
 1. Command run when the error occurred:
 <!-- Please specify the command used (if applicable, otherwise delete this block): -->
diff --git a/.github/ISSUE_TEMPLATE/no-questions-here-please-.md b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
index 9b8b6be..ebfef28 100644
--- a/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
+++ b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md
@@ -9,4 +9,6 @@ assignees: ''
 
 For **questions** about using SCENIC, please use the Discussions: https://github.com/aertslab/SCENIC/discussions/
 
-Create an issue only to report **bugs** unexpected errors or feature requests.
+Create an issue only to report **bugs**.
+
+> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected.
diff --git a/.github/ISSUE_TEMPLATE/results-design-questions.md b/.github/ISSUE_TEMPLATE/results-design-questions.md
deleted file mode 100644
index f99bb49..0000000
--- a/.github/ISSUE_TEMPLATE/results-design-questions.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-name: Results/design questions
-about: Questions about the results, design, or run strategy for pySCENIC
-title: "[results]"
-labels: results
-assignees: ''
-
----
-
-<!-- If you have questions about results, design, or strategy for running pySCENIC, please ask here -->

From c14fe15027e211a70f65e7310797fae3dd3d1e2e Mon Sep 17 00:00:00 2001
From: Kevin Leiss <kevin.leiss@icloud.com>
Date: Tue, 12 Apr 2022 12:33:31 +0200
Subject: [PATCH 5/8] added flexible missing gene cutoff

---
 src/pyscenic/cli/pyscenic.py |  4 ++++
 src/pyscenic/prune.py        |  3 ++-
 src/pyscenic/transform.py    | 11 +++++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py
index 72c1047..f86a138 100644
--- a/src/pyscenic/cli/pyscenic.py
+++ b/src/pyscenic/cli/pyscenic.py
@@ -228,6 +228,7 @@ def prune_targets_command(args):
             client_or_address=args.mode,
             module_chunksize=args.chunk_size,
             num_workers=args.num_workers,
+            frac_mapping_module=args.frac_mapping_module
         )
 
     LOGGER.info("Writing results to file.")
@@ -372,6 +373,9 @@ def add_module_parameters(parser):
     group.add_argument(
         '--min_genes', type=int, default=20, help='The minimum number of genes in a module (default: 20).'
     )
+    group.add_argument(
+        '--frac_mapping_module', type=float, default=0.2, help='Minimum fraction of genes per module needed to be annotated in the database (default: 0.2)'
+    )
     group.add_argument(
         '--expression_mtx_fname',
         type=argparse.FileType('r'),
diff --git a/src/pyscenic/prune.py b/src/pyscenic/prune.py
index 873bf5e..ac31bed 100644
--- a/src/pyscenic/prune.py
+++ b/src/pyscenic/prune.py
@@ -358,6 +358,7 @@ def prune2df(
     num_workers=None,
     module_chunksize=100,
     filter_for_annotation=True,
+    frac_mapping_module=0.2
 ) -> pd.DataFrame:
     """
     Calculate all regulons for a given sequence of ranking databases and a sequence of co-expression modules.
@@ -391,7 +392,7 @@ def prune2df(
         filter_for_annotation=filter_for_annotation,
     )
     transformation_func = partial(
-        modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery
+        modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery, frac_mapping_module=frac_mapping_module
     )
     # Create a distributed dataframe from individual delayed objects to avoid out of memory problems.
     aggregation_func = (
diff --git a/src/pyscenic/transform.py b/src/pyscenic/transform.py
index 96faae7..eaa7f7c 100644
--- a/src/pyscenic/transform.py
+++ b/src/pyscenic/transform.py
@@ -223,6 +223,7 @@ def module2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
+    frac_mapping_module=0.2,
 ) -> pd.DataFrame:
     """ """
     # Derive enriched and TF-annotated features for module.
@@ -241,7 +242,10 @@ def module2df(
     # If less than 80% of the genes are mapped to the ranking database, the module is skipped.
     n_missing = len(module) - len(genes)
     frac_missing = float(n_missing) / len(module)
-    if frac_missing >= 0.20:
+    if frac_missing >= frac_mapping_module:
+        LOGGER.warning(
+            "Mapping fraction set to {}".format(frac_mapping_module)
+        )
         LOGGER.warning(
             "Less than 80% of the genes in {} could be mapped to {}. Skipping this module.".format(module.name, db.name)
         )
@@ -293,12 +297,13 @@ def modules2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
+    frac_mapping_module=0.2,
 ) -> pd.DataFrame:
     # Make sure return recovery curves is always set to false because the metadata for the distributed dataframe needs
     # to be fixed for the dask framework.
     # TODO: Remove this restriction.
     return pd.concat(
-        [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func) for module in modules]
+        [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func, frac_mapping_module) for module in modules]
     )
 
 
@@ -443,6 +448,7 @@ def module2regulon(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
+        frac_mapping_module=0.2,
     )
     if len(df) == 0:
         return None
@@ -467,5 +473,6 @@ def modules2regulons(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
+        frac_mapping_module=0.2,
     )
     return [] if len(df) == 0 else df2regulons(df)

From 746e7cccd0ab56ab72d8ac5d721d06feae252ca9 Mon Sep 17 00:00:00 2001
From: Kevin Leiss <kevin.leiss@icloud.com>
Date: Tue, 12 Apr 2022 15:56:26 +0200
Subject: [PATCH 6/8] added CLI argument for nGenes per module

---
 src/pyscenic/cli/pyscenic.py |  2 +-
 src/pyscenic/transform.py    | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py
index f86a138..7cf80c8 100644
--- a/src/pyscenic/cli/pyscenic.py
+++ b/src/pyscenic/cli/pyscenic.py
@@ -374,7 +374,7 @@ def add_module_parameters(parser):
         '--min_genes', type=int, default=20, help='The minimum number of genes in a module (default: 20).'
     )
     group.add_argument(
-        '--frac_mapping_module', type=float, default=0.2, help='Minimum fraction of genes per module needed to be annotated in the database (default: 0.2)'
+        '--frac_mapping_module', type=float, default=0.8, help='Minimum fraction of genes per module needed to be annotated in the database (default: 0.2)'
     )
     group.add_argument(
         '--expression_mtx_fname',
diff --git a/src/pyscenic/transform.py b/src/pyscenic/transform.py
index eaa7f7c..515eae5 100644
--- a/src/pyscenic/transform.py
+++ b/src/pyscenic/transform.py
@@ -223,7 +223,7 @@ def module2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
-    frac_mapping_module=0.2,
+    frac_mapping_module=0.8,
 ) -> pd.DataFrame:
     """ """
     # Derive enriched and TF-annotated features for module.
@@ -242,12 +242,9 @@ def module2df(
     # If less than 80% of the genes are mapped to the ranking database, the module is skipped.
     n_missing = len(module) - len(genes)
     frac_missing = float(n_missing) / len(module)
-    if frac_missing >= frac_mapping_module:
+    if frac_missing >= (1-frac_mapping_module):
         LOGGER.warning(
-            "Mapping fraction set to {}".format(frac_mapping_module)
-        )
-        LOGGER.warning(
-            "Less than 80% of the genes in {} could be mapped to {}. Skipping this module.".format(module.name, db.name)
+            "Less than {}% of the genes in {} could be mapped to {}. Skipping this module.".format((1-frac_mapping_module)*10,module.name, db.name)
         )
         return DF_META_DATA
 
@@ -297,7 +294,7 @@ def modules2df(
     weighted_recovery=False,
     return_recovery_curves=False,
     module2features_func=module2features,
-    frac_mapping_module=0.2,
+    frac_mapping_module=0.8,
 ) -> pd.DataFrame:
     # Make sure return recovery curves is always set to false because the metadata for the distributed dataframe needs
     # to be fixed for the dask framework.
@@ -448,7 +445,7 @@ def module2regulon(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
-        frac_mapping_module=0.2,
+        frac_mapping_module=0.8,
     )
     if len(df) == 0:
         return None
@@ -473,6 +470,6 @@ def modules2regulons(
         weighted_recovery=weighted_recovery,
         return_recovery_curves=return_recovery_curves,
         module2features_func=module2features_func,
-        frac_mapping_module=0.2,
+        frac_mapping_module=0.8,
     )
     return [] if len(df) == 0 else df2regulons(df)

From a3fd51da7ba58eb1329ed334a0d9d4a31500fc15 Mon Sep 17 00:00:00 2001
From: Kevin Leiss <kevin.leiss@icloud.com>
Date: Tue, 12 Apr 2022 16:51:49 +0200
Subject: [PATCH 7/8] fix typo

---
 src/pyscenic/transform.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pyscenic/transform.py b/src/pyscenic/transform.py
index 515eae5..e2e940a 100644
--- a/src/pyscenic/transform.py
+++ b/src/pyscenic/transform.py
@@ -244,7 +244,7 @@ def module2df(
     frac_missing = float(n_missing) / len(module)
     if frac_missing >= (1-frac_mapping_module):
         LOGGER.warning(
-            "Less than {}% of the genes in {} could be mapped to {}. Skipping this module.".format((1-frac_mapping_module)*10,module.name, db.name)
+            "Less than {}% of the genes in {} could be mapped to {}. Skipping this module.".format(frac_mapping_module*100,module.name, db.name)
         )
         return DF_META_DATA
 

From a7be7e4f9a84eebc376ebd73f8ed3a5d010d911f Mon Sep 17 00:00:00 2001
From: Kevin Leiss <kevin.leiss@icloud.com>
Date: Tue, 12 Apr 2022 17:30:56 +0200
Subject: [PATCH 8/8] set frac_mapping_module in prune to default

---
 src/pyscenic/prune.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pyscenic/prune.py b/src/pyscenic/prune.py
index ac31bed..d43f2ad 100644
--- a/src/pyscenic/prune.py
+++ b/src/pyscenic/prune.py
@@ -358,7 +358,7 @@ def prune2df(
     num_workers=None,
     module_chunksize=100,
     filter_for_annotation=True,
-    frac_mapping_module=0.2
+    frac_mapping_module=0.8
 ) -> pd.DataFrame:
     """
     Calculate all regulons for a given sequence of ranking databases and a sequence of co-expression modules.