diff --git a/.github/ISSUE_TEMPLATE/bug-error-report.md b/.github/ISSUE_TEMPLATE/bug-error-report.md index 80e3d5b..a0d3edc 100644 --- a/.github/ISSUE_TEMPLATE/bug-error-report.md +++ b/.github/ISSUE_TEMPLATE/bug-error-report.md @@ -10,6 +10,8 @@ assignees: '' **Describe the bug** A clear and concise description of what the bug is. +> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected. + **Steps to reproduce the behavior** 1. Command run when the error occurred: diff --git a/.github/ISSUE_TEMPLATE/no-questions-here-please-.md b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md new file mode 100644 index 0000000..ebfef28 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/no-questions-here-please-.md @@ -0,0 +1,14 @@ +--- +name: No questions here please! +about: Questions about the results, design, or run strategy for pySCENIC +title: "[results]" +labels: question +assignees: '' + +--- + +For **questions** about using SCENIC, please use the Discussions: https://github.com/aertslab/SCENIC/discussions/ + +Create an issue only to report **bugs**. + +> Mote that most *errors* are due to the input from the user, and therefore should be treated as questions in the Discussions. Please, only report them as bugs if you are quite certain that they are not behaving as expected. diff --git a/.github/ISSUE_TEMPLATE/results-design-questions.md b/.github/ISSUE_TEMPLATE/results-design-questions.md deleted file mode 100644 index f99bb49..0000000 --- a/.github/ISSUE_TEMPLATE/results-design-questions.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -name: Results/design questions -about: Questions about the results, design, or run strategy for pySCENIC -title: "[results]" -labels: results -assignees: '' - ---- - - diff --git a/README.rst b/README.rst index 8ebc367..22d5b7b 100644 --- a/README.rst +++ b/README.rst @@ -113,6 +113,7 @@ Additional resources For more information, please visit LCB_, the main `SCENIC website `_, or `SCENIC (R version) `_. +There is a tutorial to `create new cisTarget databases `_. The CLI to pySCENIC has also been streamlined into a pipeline that can be run with a single command, using the Nextflow workflow manager. There are two Nextflow implementations available: diff --git a/src/pyscenic/cli/pyscenic.py b/src/pyscenic/cli/pyscenic.py index 72c1047..7cf80c8 100644 --- a/src/pyscenic/cli/pyscenic.py +++ b/src/pyscenic/cli/pyscenic.py @@ -228,6 +228,7 @@ def prune_targets_command(args): client_or_address=args.mode, module_chunksize=args.chunk_size, num_workers=args.num_workers, + frac_mapping_module=args.frac_mapping_module ) LOGGER.info("Writing results to file.") @@ -372,6 +373,9 @@ def add_module_parameters(parser): group.add_argument( '--min_genes', type=int, default=20, help='The minimum number of genes in a module (default: 20).' ) + group.add_argument( + '--frac_mapping_module', type=float, default=0.8, help='Minimum fraction of genes per module needed to be annotated in the database (default: 0.2)' + ) group.add_argument( '--expression_mtx_fname', type=argparse.FileType('r'), diff --git a/src/pyscenic/prune.py b/src/pyscenic/prune.py index 873bf5e..d43f2ad 100644 --- a/src/pyscenic/prune.py +++ b/src/pyscenic/prune.py @@ -358,6 +358,7 @@ def prune2df( num_workers=None, module_chunksize=100, filter_for_annotation=True, + frac_mapping_module=0.8 ) -> pd.DataFrame: """ Calculate all regulons for a given sequence of ranking databases and a sequence of co-expression modules. @@ -391,7 +392,7 @@ def prune2df( filter_for_annotation=filter_for_annotation, ) transformation_func = partial( - modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery + modules2df, module2features_func=module2features_func, weighted_recovery=weighted_recovery, frac_mapping_module=frac_mapping_module ) # Create a distributed dataframe from individual delayed objects to avoid out of memory problems. aggregation_func = ( diff --git a/src/pyscenic/transform.py b/src/pyscenic/transform.py index 96faae7..e2e940a 100644 --- a/src/pyscenic/transform.py +++ b/src/pyscenic/transform.py @@ -223,6 +223,7 @@ def module2df( weighted_recovery=False, return_recovery_curves=False, module2features_func=module2features, + frac_mapping_module=0.8, ) -> pd.DataFrame: """ """ # Derive enriched and TF-annotated features for module. @@ -241,9 +242,9 @@ def module2df( # If less than 80% of the genes are mapped to the ranking database, the module is skipped. n_missing = len(module) - len(genes) frac_missing = float(n_missing) / len(module) - if frac_missing >= 0.20: + if frac_missing >= (1-frac_mapping_module): LOGGER.warning( - "Less than 80% of the genes in {} could be mapped to {}. Skipping this module.".format(module.name, db.name) + "Less than {}% of the genes in {} could be mapped to {}. Skipping this module.".format(frac_mapping_module*100,module.name, db.name) ) return DF_META_DATA @@ -293,12 +294,13 @@ def modules2df( weighted_recovery=False, return_recovery_curves=False, module2features_func=module2features, + frac_mapping_module=0.8, ) -> pd.DataFrame: # Make sure return recovery curves is always set to false because the metadata for the distributed dataframe needs # to be fixed for the dask framework. # TODO: Remove this restriction. return pd.concat( - [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func) for module in modules] + [module2df(db, module, motif_annotations, weighted_recovery, False, module2features_func, frac_mapping_module) for module in modules] ) @@ -443,6 +445,7 @@ def module2regulon( weighted_recovery=weighted_recovery, return_recovery_curves=return_recovery_curves, module2features_func=module2features_func, + frac_mapping_module=0.8, ) if len(df) == 0: return None @@ -467,5 +470,6 @@ def modules2regulons( weighted_recovery=weighted_recovery, return_recovery_curves=return_recovery_curves, module2features_func=module2features_func, + frac_mapping_module=0.8, ) return [] if len(df) == 0 else df2regulons(df)