From 9dda2b65a9fc02dab656b9952f9755961ebbb630 Mon Sep 17 00:00:00 2001 From: Jayoung Ryu Date: Fri, 3 May 2024 15:42:08 -0400 Subject: [PATCH] update no edit documentation and the confidence adjustment with negative controls --- bean/model/parser.py | 2 +- bean/model/run.py | 9 ++-- docs/_tutorial_gwas.md | 20 ++++---- docs/_tutorial_no_edit.md | 101 +++----------------------------------- docs/tutorial_no_edit.rst | 6 +-- 5 files changed, 26 insertions(+), 112 deletions(-) diff --git a/bean/model/parser.py b/bean/model/parser.py index e89ab15..1955731 100755 --- a/bean/model/parser.py +++ b/bean/model/parser.py @@ -209,7 +209,7 @@ def parse_args(parser=None): action="store_true", ) parser.add_argument( - "--no-negative-control", + "--dont-adjust-confidence-by-negative-control", action="store_true", help="Do not adjust confidence by negative controls. Without this flag, variant mode will use negative control variants, and tiling mode will use the synonymous variants to adjust confidence of the result.", ) diff --git a/bean/model/run.py b/bean/model/run.py index e24fd08..a1cad81 100755 --- a/bean/model/run.py +++ b/bean/model/run.py @@ -27,9 +27,6 @@ def check_args(args, bdata): - args.adjust_confidence_by_negative_control = ( - not args.no_negative_control - ) if args.scale_by_acc: if args.acc_col is None and args.acc_bw_path is None: raise ValueError( @@ -42,10 +39,12 @@ def check_args(args, bdata): args.acc_bw_path = None if args.outdir is None: args.outdir = os.path.dirname(args.bdata_path) + if args.fit_negctrl and (args.negctrl_col not in bdata.guides.columns): + raise ValueError(f"--negctrl-col argument '{args.negctrl_col}' not in ReporterScreen.guides.columns {bdata.guides.columns}. Please check the input or do not provide --fit-negctrl flag if you don't have the negative controls.") if args.library_design == "variant": - if args.adjust_confidence_by_negative_control and (args.negctrl_col not in bdata.guides.columns): - raise ValueError(f"--negctrl-col argument '{args.negctrl_col}' not in ReporterScreen.guides.columns {bdata.guides.columns}. Please check the input or provide --no-negative-control flag if you don't have the negative controls.") + args.adjust_confidence_by_negative_control = args.fit_negctrl and (not args.dont_adjust_confidence_by_negative_control) elif args.library_design == "tiling": + args.adjust_confidence_by_negative_control = (not args.dont_adjust_confidence_by_negative_control) if args.allele_df_key is None: key_to_use = "allele_counts" n_alleles = len(bdata.uns[key_to_use]) diff --git a/docs/_tutorial_gwas.md b/docs/_tutorial_gwas.md index f81f714..64229bc 100755 --- a/docs/_tutorial_gwas.md +++ b/docs/_tutorial_gwas.md @@ -20,7 +20,7 @@ screen_id=my_sorting_tiling_screen working_dir=my_workdir # 1. Count gRNA & reporter -bean-count-samples \ +bean count-samples \ --input ${working_dir}//sample_list.csv `# Contains fastq file path; see test file for example.`\ -b A `# Base A is edited (into G)` \ -f ${working_dir}/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ @@ -29,14 +29,14 @@ bean-count-samples \ -n ${screen_id} `# ID of the screen to be counted` # 2. QC samples & guides -bean-qc \ +bean qc \ ${working_dir}/bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ -o ${working_dir}/bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ -r ${working_dir}/qc_report_${screen_id} `# Prefix for QC report` \ -b ` # Remove replicates with no good samples. # 3. Quantify variant effect -bean-run sorting variant \ +bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ --fit-negctrl \ @@ -51,7 +51,7 @@ See more details below. screen_id=my_sorting_tiling_screen # 1. Count gRNA & reporter -bean-count-samples \ +bean count-samples \ --input ${working_dir}/sample_list.csv `# Contains fastq file path; see test file for example.`\ -b A `# Base A is edited (into G)` \ -f ${working_dir}/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ @@ -66,7 +66,7 @@ Make sure you follow the [input file format](https://pinellolab.github.io/crispr Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the [input file guideline](https://pinellolab.github.io/crispr-bean/input.html), but you can change the parameters with the full argument list of [bean qc](https://pinellolab.github.io/crispr-bean/qc.html). (Common factors you may want to tweak is `--ctrl-cond=bulk` and `--lfc-conds=top,bot` if you have different sample condition labels.) ```bash -bean-qc \ +bean qc \ bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ -r qc_report_${screen_id} `# Prefix for QC report` @@ -79,11 +79,11 @@ If the data does not include reporter editing data, you can provide `--no-editin ## 3. Quantify variant effect (:ref:`run`) -`bean-run` can take 3 run options to quantify editing rate: +`bean run` can take 3 run options to quantify editing rate: 1. From **reporter + accessibility** If your gRNA metadata table (`${working_dir}/test_guide_info.csv` above) included per-gRNA accessibility score, ```bash - bean-run sorting variant \ + bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ --fit-negctrl \ @@ -94,7 +94,7 @@ If the data does not include reporter editing data, you can provide `--no-editin If your gRNA metadata table (`${working_dir}/test_guide_info.csv` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal, ```bash - bean-run sorting variant \ + bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ --fit-negctrl \ @@ -107,7 +107,7 @@ If the data does not include reporter editing data, you can provide `--no-editin This assumes the all target sites have the uniform chromatin accessibility. ```bash - bean-run sorting variant \ + bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ --fit-negctrl @@ -117,7 +117,7 @@ If the data does not include reporter editing data, you can provide `--no-editin Use this option if your data don't have editing outcome information. ```bash - bean-run sorting variant \ + bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ --fit-negctrl \ diff --git a/docs/_tutorial_no_edit.md b/docs/_tutorial_no_edit.md index 32e83dc..05656ae 100644 --- a/docs/_tutorial_no_edit.md +++ b/docs/_tutorial_no_edit.md @@ -17,110 +17,25 @@ Here, we consider an example where BEAN uses the **external count** without repo ## Example workflow ```bash -screen_id=my_sorting_tiling_screen +screen_id=my_sorting_screen working_dir=my_workdir -# 1. Count gRNA & reporter -bean-count-samples \ ---input ${working_dir}//sample_list.csv `# Contains fastq file path; see test file for example.`\ --b A `# Base A is edited (into G)` \ --f ${working_dir}/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ --o ./ `# Output directory` \ --r `# Quantify reporter edits` \ --n ${screen_id} `# ID of the screen to be counted` +# 1. Given that we have gRNA count for each sample, generate ReporterScreen (.h5ad) object for downstream analysis. +bean create-screen ${working_dir}/gRNA_info.csv ${working_dir}/sample_list.csv ${working_dir}/gRNA_counts.csv -o ${working_dir}/bean_count_${screen_id} # 2. QC samples & guides -bean-qc \ +bean qc \ ${working_dir}/bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ -o ${working_dir}/bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ -r ${working_dir}/qc_report_${screen_id} `# Prefix for QC report` \ -b ` # Remove replicates with no good samples. # 3. Quantify variant effect -bean-run sorting variant \ +bean run sorting variant \ ${working_dir}/bean_count_${screen_id}_masked.h5ad \ -o ${working_dir}/ \ - --fit-negctrl \ - --scale-by-acc \ - --accessibility-col accessibility + --uniform-edit `# As we have no edit information.` \ + [--no-negative-control] `# If you don't have the negative control gRNAs.` ``` -See more details below. - -## 1. Count gRNA & reporter (:ref:`count_samples`) -```bash -screen_id=my_sorting_tiling_screen - -# 1. Count gRNA & reporter -bean-count-samples \ ---input ${working_dir}/sample_list.csv `# Contains fastq file path; see test file for example.`\ --b A `# Base A is edited (into G)` \ --f ${working_dir}/test_guide_info.csv `# Contains gRNA metadata; see test file for example.`\ --o ./ `# Output directory` \ --r `# Quantify reporter edits` \ --n ${screen_id} `# ID of the screen to be counted` -``` - -Make sure you follow the [input file format](https://pinellolab.github.io/crispr-bean/input.html) for seamless downstream steps. This will produce `./bean_count_${screen_id}.h5ad`. - -## 2. QC samples & guides (:ref:`qc`) -Base editing data will include QC about editing efficiency. As QC uses predefined column names and values, beware to follow the [input file guideline](https://pinellolab.github.io/crispr-bean/input.html), but you can change the parameters with the full argument list of [bean qc](https://pinellolab.github.io/crispr-bean/qc.html). (Common factors you may want to tweak is `--ctrl-cond=bulk` and `--lfc-conds=top,bot` if you have different sample condition labels.) - -```bash -bean-qc \ - bean_count_${screen_id}.h5ad `# Input ReporterScreen .h5ad file path` \ - -o bean_count_${screen_id}_masked.h5ad `# Output ReporterScreen .h5ad file path` \ - -r qc_report_${screen_id} `# Prefix for QC report` -``` - - - -If the data does not include reporter editing data, you can provide `--no-editing` flag to omit the editing rate QC. - - -## 3. Quantify variant effect (:ref:`run`) - -`bean-run` can take 3 run options to quantify editing rate: -1. From **reporter + accessibility** - If your gRNA metadata table (`${working_dir}/test_guide_info.csv` above) included per-gRNA accessibility score, - ```bash - bean-run sorting variant \ - ${working_dir}/bean_count_${screen_id}_masked.h5ad \ - -o ${working_dir}/ \ - --fit-negctrl \ - --scale-by-acc \ - --accessibility-col accessibility - ``` - - If your gRNA metadata table (`${working_dir}/test_guide_info.csv` above) included per-gRNA chromosome & position and you have bigWig file with accessibility signal, - - ```bash - bean-run sorting variant \ - ${working_dir}/bean_count_${screen_id}_masked.h5ad \ - -o ${working_dir}/ \ - --fit-negctrl \ - --scale-by-acc \ - --accessibility-bw accessibility.bw - ``` - -2. From **reporter**, without accessibility - - This assumes the all target sites have the uniform chromatin accessibility. - - ```bash - bean-run sorting variant \ - ${working_dir}/bean_count_${screen_id}_masked.h5ad \ - -o ${working_dir}/ \ - --fit-negctrl - ``` - -3. No reporter information, assume the same editing efficiency of all gRNAs. - Use this option if your data don't have editing outcome information. - - ```bash - bean-run sorting variant \ - ${working_dir}/bean_count_${screen_id}_masked.h5ad \ - -o ${working_dir}/ \ - --fit-negctrl \ - --uniform-edit - ``` \ No newline at end of file +See the example input files [here](https://pinellolab.github.io/crispr-bean/create_screen.html). \ No newline at end of file diff --git a/docs/tutorial_no_edit.rst b/docs/tutorial_no_edit.rst index 179c814..c256b96 100644 --- a/docs/tutorial_no_edit.rst +++ b/docs/tutorial_no_edit.rst @@ -1,7 +1,7 @@ -.. _prolif_gwas: +.. _tutorial_no_edit: -Proliferation screen with GWAS library +Screen analysis without reporter edits ********************************************** -.. mdinclude:: _prolif_gwas.md +.. mdinclude:: _tutorial_no_edit.md See :ref:`subcommands` for the full details.