From 0226548fdf224a428ea28f5cb775c1a04df547e1 Mon Sep 17 00:00:00 2001 From: Samuel Nichols Date: Mon, 6 May 2024 16:08:00 -0600 Subject: [PATCH 01/16] Sam/try plots (#71) * Fix batch mode pandas warning. (#70) * refactor to call method on DataFrame, rather than Series. Removes warning. * Fix pandas future warning in CRISPRessoWGS --------- Co-authored-by: Cole Lyman * Functional * Cole/fix status file name (#69) * Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam * Try catch all futures * Fix test fail plots * Point test to try-plots * Fix d3 not showing and plotly mixing with matplotlib * Use logger for warnings and debug statements * Point tests back at master --------- Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman --- CRISPResso2/CRISPRessoAggregateCORE.py | 7 ++++++- CRISPResso2/CRISPRessoBatchCORE.py | 7 ++++++- CRISPResso2/CRISPRessoCORE.py | 15 ++++++++++----- CRISPResso2/CRISPRessoMultiProcessing.py | 15 +++++++++++---- .../CRISPRessoReports/CRISPRessoReport.py | 1 - .../templates/shared/partials/fig_reports.html | 16 +++++++++------- 6 files changed, 42 insertions(+), 19 deletions(-) diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index 5cd4cba9..59d8897f 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -869,7 +869,12 @@ def main(): debug('Plot pool results:') for future in process_futures: debug('future: ' + str(future)) - future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future + for future in process_futures: + try: + future.result() + except Exception as e: + logger.warning('Error in plot pool: %s' % e) + logger.debug(traceback.format_exc()) process_pool.shutdown() info('Analysis Complete!', {'percent_complete': 100}) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 4c60e2da..4e387f98 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -904,7 +904,12 @@ def main(): debug('CRISPResso batch results:') for future in process_futures: debug('future: ' + str(future)) - future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future + for future in process_futures: + try: + future.result() + except Exception as e: + logger.warning('Error in plot pool: %s' % e) + logger.debug(traceback.format_exc()) process_pool.shutdown() if not args.suppress_report: diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index a1590604..a3a57b9f 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -3776,11 +3776,11 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ mod_df_for_plot.insert(0, 'Batch', ref_name) plot_root = _jp('2a.'+ ref_plot_name + 'Nucleotide_percentage_quilt') - pro_output_name = f'plot_{os.path.basename(plot_root)}.json' + pro_output_name = os.path.join(OUTPUT_DIRECTORY, f'plot_{os.path.basename(plot_root)}.json') plot_2a_input = { 'nuc_pct_df': nuc_df_for_plot, 'mod_pct_df': mod_df_for_plot, - 'fig_filename_root': f'{_jp(pro_output_name)}' if not args.use_matplotlib and C2PRO_INSTALLED else plot_root, + 'fig_filename_root': pro_output_name if not args.use_matplotlib and C2PRO_INSTALLED else plot_root, 'save_also_png': save_png, 'sgRNA_intervals': sgRNA_intervals, 'sgRNA_names': sgRNA_names, @@ -3824,11 +3824,11 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ for x in include_idxs_list: new_include_idx += [x - new_sel_cols_start] plot_root = _jp('2b.'+ ref_plot_name + 'Nucleotide_percentage_quilt_around_' + sgRNA_label) - pro_output_name = f'plot_{os.path.basename(plot_root)}.json' + pro_output_name = os.path.join(OUTPUT_DIRECTORY, f'plot_{os.path.basename(plot_root)}.json') plot_2b_input = { 'nuc_pct_df': nuc_df_for_plot.iloc[:, sel_cols], 'mod_pct_df': mod_df_for_plot.iloc[:, sel_cols], - 'fig_filename_root': f'{_jp(pro_output_name)}' if not args.use_matplotlib and C2PRO_INSTALLED else plot_root, + 'fig_filename_root': pro_output_name if not args.use_matplotlib and C2PRO_INSTALLED else plot_root, 'save_also_png': save_png, 'sgRNA_intervals': new_sgRNA_intervals, 'sgRNA_names': sgRNA_names, @@ -4893,7 +4893,12 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): debug('Plot pool results:') for future in process_futures: debug('future: ' + str(future)) - future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future + for future in process_futures: + try: + future.result() + except Exception as e: + logger.warning('Error in plot pool: %s' % e) + logger.debug(traceback.format_exc()) process_pool.shutdown() info('Done!') diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py index d9afb3d1..0ea1f813 100644 --- a/CRISPResso2/CRISPRessoMultiProcessing.py +++ b/CRISPResso2/CRISPRessoMultiProcessing.py @@ -14,6 +14,7 @@ from inspect import getmodule, stack import numpy as np import pandas as pd +import traceback def get_max_processes(): return mp.cpu_count() @@ -271,7 +272,13 @@ def run_plot(plot_func, plot_args, num_processes, process_futures, process_pool) ------- None """ - if num_processes > 1: - process_futures[process_pool.submit(plot_func, **plot_args)] = (plot_func, plot_args) - else: - plot_func(**plot_args) + logger = logging.getLogger(getmodule(stack()[1][0]).__name__) + try: + if num_processes > 1: + process_futures[process_pool.submit(plot_func, **plot_args)] = (plot_func, plot_args) + else: + plot_func(**plot_args) + except Exception as e: + logger.warn(f"Plot error {e}, skipping plot \n") + logger.debug(traceback.format_exc()) + diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py index fde5fc1d..8e86084e 100644 --- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py +++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py @@ -588,7 +588,6 @@ def fill_default(dictionary, key, default_type=list): 'datas': [], 'htmls': [], } - for html in sub_html_files: sub_html_files[html] = crispresso_data_path + sub_html_files[html] with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile: diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html index e21c6853..b30c6cdc 100644 --- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html @@ -13,7 +13,7 @@ {%- if amplicon_name is defined -%} {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'][amplicon_name] -%} {{report_data['figures']['htmls'][amplicon_name][fig_name]|safe}} - {%- else -%} + {%- elif fig_name in report_data['figures']['locs'][amplicon_name] -%} {% endif -%} @@ -23,12 +23,14 @@ {%- else %} {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'] -%} {{report_data['figures']['htmls'][fig_name]|safe}} - {%- else -%} + {%- elif fig_name in report_data['figures']['locs'] -%} {% endif -%} - - {%- for (data_label,data_path) in report_data['figures']['datas'][fig_name] %} -

Data: {{data_label}}

- {%- endfor -%} + {% if fig_name in report_data['figures']['captions'] and fig_name in report_data['figures']['datas'] %} + + {%- for (data_label,data_path) in report_data['figures']['datas'][fig_name] %} +

Data: {{data_label}}

+ {%- endfor -%} + {%- endif %} {%- endif %} - + \ No newline at end of file From 3513a6c67caea631b0a5094dbe52f29ae6384aa7 Mon Sep 17 00:00:00 2001 From: Samuel Nichols Date: Mon, 6 May 2024 16:16:33 -0600 Subject: [PATCH 02/16] Sam/fix plots (#72) * Fix batch mode pandas warning. (#70) * refactor to call method on DataFrame, rather than Series. Removes warning. * Fix pandas future warning in CRISPRessoWGS --------- Co-authored-by: Cole Lyman * Functional * Cole/fix status file name (#69) * Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam * Try catch all futures * Fix test fail plots * Fix d3 not showing and plotly mixing with matplotlib --------- Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman From d88094268257eac459aa14e9eeceb5d13ec05909 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Tue, 7 May 2024 10:22:11 -0600 Subject: [PATCH 03/16] Remove token from integration tests file --- .github/workflows/integration_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 7d968708..92a11f39 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -45,7 +45,6 @@ jobs: uses: actions/checkout@v3 with: repository: edilytics/CRISPResso2_tests - token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }} # ref: '' # Use this to specify a branch other than master - name: Run Basic From 64ef72e0b6c92ce8b05f1b7da3706c14ba38dcd8 Mon Sep 17 00:00:00 2001 From: Trevor Martin <60452953+trevormartinj7@users.noreply.github.com> Date: Thu, 9 May 2024 15:08:42 -0600 Subject: [PATCH 04/16] Passing sgRNA sequences to regular and Batch D3 plots (#73) * Sam/try plots (#71) * Fix batch mode pandas warning. (#70) * refactor to call method on DataFrame, rather than Series. Removes warning. * Fix pandas future warning in CRISPRessoWGS --------- Co-authored-by: Cole Lyman * Functional * Cole/fix status file name (#69) * Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam * Try catch all futures * Fix test fail plots * Point test to try-plots * Fix d3 not showing and plotly mixing with matplotlib * Use logger for warnings and debug statements * Point tests back at master --------- Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman * Sam/fix plots (#72) * Fix batch mode pandas warning. (#70) * refactor to call method on DataFrame, rather than Series. Removes warning. * Fix pandas future warning in CRISPRessoWGS --------- Co-authored-by: Cole Lyman * Functional * Cole/fix status file name (#69) * Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam * Try catch all futures * Fix test fail plots * Fix d3 not showing and plotly mixing with matplotlib --------- Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman * Remove token from integration tests file * Provide sgRNA_sequences to plot_nucleotide_quilt plots * Passing sgRNA_sequences to plot * Refactor check for determining when to use CRISPREssoPro or matplotlib for Batch plots * Add max-height to Batch report samples * Change testing branch * Fix wrong check for large Batch plots * Update integration_tests.yml to point back at master --------- Co-authored-by: Samuel Nichols Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman --- CRISPResso2/CRISPRessoBatchCORE.py | 45 +++++++++++++++---- CRISPResso2/CRISPRessoCORE.py | 7 +++ .../templates/batchReport.html | 2 +- .../shared/partials/failed_runs.html | 2 +- tests/unit_tests/test_CRISPRessoBatchCORE.py | 44 ++++++++++++++++++ 5 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 tests/unit_tests/test_CRISPRessoBatchCORE.py diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 4e387f98..f6ca5550 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -49,6 +49,33 @@ def check_library(library_name): np = check_library('numpy') +def should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff=300): + """Determine if large plots should be plotted. + + Parameters + ---------- + num_rows : int + Number of rows in the dataframe. + c2pro_installed : bool + Whether CRISPRessoPro is installed. + use_matplotlib : bool + Whether to use matplotlib when CRISPRessoPro is installed, i.e. value + of `--use_matplotlib`. + large_plot_cutoff : int, optional + Number of samples at which to not plot large plots with matplotlib. + Note that each sample has 6 rows in the datafame. Defaults to 300. + + Returns + ------- + bool + Whether to plot large plots. + """ + return ( + (not use_matplotlib and c2pro_installed) + or (num_rows / 6) < large_plot_cutoff + ) + + def main(): try: start_time = datetime.now() @@ -395,8 +422,6 @@ def main(): crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {} - large_plot_cutoff = 300 - percent_complete_start, percent_complete_end = 90, 99 if all_amplicons: percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons) @@ -580,7 +605,7 @@ def main(): sub_modification_percentage_summary_filename = _jp(amplicon_plot_name + 'Modification_percentage_summary_around_sgRNA_'+sgRNA+'.txt') sub_modification_percentage_summary_df.to_csv(sub_modification_percentage_summary_filename, sep='\t', index=None) - if not args.suppress_plots and not args.suppress_batch_summary_plots and (nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff: + if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(sub_nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib): # plot for each guide # show all sgRNA's on the plot sub_sgRNA_intervals = [] @@ -614,6 +639,7 @@ def main(): 'fig_filename_root': f'{this_window_nuc_pct_quilt_plot_name}.json' if not args.use_matplotlib and C2PRO_INSTALLED else this_window_nuc_pct_quilt_plot_name, 'save_also_png': save_png, 'sgRNA_intervals': sub_sgRNA_intervals, + 'sgRNA_sequences': consensus_guides, 'quantification_window_idxs': include_idxs, 'custom_colors': custom_config['colors'], } @@ -628,7 +654,7 @@ def main(): crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] - if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff: + if args.base_editor_output and should_plot_large_plots(sub_nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib): this_window_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_conversion_map_around_sgRNA_'+sgRNA) conversion_map_input = { 'nuc_pct_df': sub_nucleotide_percentage_summary_df, @@ -656,7 +682,7 @@ def main(): ] # done with per-sgRNA plots - if not args.suppress_plots and not args.suppress_batch_summary_plots: # plot the whole region + if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib): # plot the whole region this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, @@ -664,6 +690,7 @@ def main(): 'fig_filename_root': f'{this_nuc_pct_quilt_plot_name}.json' if not args.use_matplotlib and C2PRO_INSTALLED else this_nuc_pct_quilt_plot_name, 'save_also_png': save_png, 'sgRNA_intervals': consensus_sgRNA_intervals, + 'sgRNA_sequences': consensus_guides, 'quantification_window_idxs': include_idxs, 'custom_colors': custom_config['colors'], } @@ -679,7 +706,7 @@ def main(): crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = '' crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base for the amplicon ' + amplicon_name crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] - if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff: + if args.base_editor_output and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib): this_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_conversion_map') conversion_map_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, @@ -706,7 +733,7 @@ def main(): crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] else: # guides are not the same - if not args.suppress_plots and not args.suppress_batch_summary_plots: + if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib): this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, @@ -724,7 +751,7 @@ def main(): nuc_pct_quilt_plot_names.append(plot_name) crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base for the amplicon ' + amplicon_name crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] - if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff: + if args.base_editor_output and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib): this_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt') conversion_map_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, @@ -745,7 +772,7 @@ def main(): crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] # allele modification frequency heatmap and line plots - if C2PRO_INSTALLED and not args.use_matplotlib and not args.suppress_plots and not args.suppress_batch_summary_plots and (nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff: + if C2PRO_INSTALLED and not args.use_matplotlib and not args.suppress_plots and not args.suppress_batch_summary_plots: if guides_all_same: sgRNA_intervals = [consensus_sgRNA_intervals] * modification_frequency_summary_df.shape[0] else: diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index a3a57b9f..10da6787 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -3785,6 +3785,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'sgRNA_intervals': sgRNA_intervals, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'sgRNA_sequences': sgRNA_sequences, 'quantification_window_idxs': include_idxs_list, 'custom_colors': custom_config["colors"], } @@ -3833,6 +3834,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'sgRNA_intervals': new_sgRNA_intervals, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'sgRNA_sequences': [sgRNA], 'quantification_window_idxs': new_include_idx, 'custom_colors': custom_config["colors"], } @@ -4184,6 +4186,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ sgRNA_intervals = refs[ref_names_for_hdr[0]]['sgRNA_intervals'] sgRNA_names = refs[ref_names_for_hdr[0]]['sgRNA_names'] sgRNA_mismatches = refs[ref_names_for_hdr[0]]['sgRNA_mismatches'] + sgRNA_sequences = refs[ref_names_for_hdr[0]]['sgRNA_sequences'] # include_idxs_list = refs[ref_names_for_hdr[0]]['include_idxs'] include_idxs_list = [] # the quantification windows may be different between different amplicons @@ -4204,6 +4207,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'quantification_window_idxs': include_idxs_list, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'sgRNA_sequences': sgRNA_sequences, 'custom_colors': custom_config["colors"], } debug('Plotting HDR nucleotide quilt') @@ -4789,6 +4793,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): sgRNA_intervals = refs[ref_names[0]]['sgRNA_intervals'] sgRNA_names = refs[ref_names[0]]['sgRNA_names'] sgRNA_mismatches = refs[ref_names[0]]['sgRNA_mismatches'] + sgRNA_sequences = refs[ref_names[0]]['sgRNA_sequences'] include_idxs_list = refs[ref_names[0]]['include_idxs'] plot_root = _jp('11a.Prime_editing_nucleotide_percentage_quilt') @@ -4801,6 +4806,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): 'sgRNA_intervals': sgRNA_intervals, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'sgRNA_sequences': sgRNA_sequences, 'quantification_window_idxs': include_idxs_list, 'custom_colors': custom_config['colors'] } @@ -4861,6 +4867,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): 'sgRNA_intervals': new_sgRNA_intervals, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'sgRNA_sequences': [sgRNA], 'quantification_window_idxs': new_include_idx, 'custom_colors': custom_config['colors'] } diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html index 59bd6f5b..3b030fcc 100644 --- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -58,7 +58,7 @@
{{report_name}}
-
+
{% for run_name in run_names %} {{run_name}} {% endfor %} diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html index 015dda0f..aab88602 100644 --- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html @@ -9,7 +9,7 @@
Failed Runs
-
+
{% for failed_run in failed_runs %} {# Toggle the description visibility on click #} Date: Fri, 10 May 2024 14:12:57 -0600 Subject: [PATCH 05/16] Push new releases to ECR (#74) * Create aws_ecr.yml (#1) * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * us-east-1 * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml * Update aws_ecr.yml --- .github/workflows/aws_ecr.yml | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/aws_ecr.yml diff --git a/.github/workflows/aws_ecr.yml b/.github/workflows/aws_ecr.yml new file mode 100644 index 00000000..ea4e7c98 --- /dev/null +++ b/.github/workflows/aws_ecr.yml @@ -0,0 +1,47 @@ +name: Push Docker image to Amazon ECR + +on: + release: + types: + - edited + - released + branches: + - master + +jobs: + build-and-push: + name: Build and Push Docker image + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - id: get_version + name: Get version + uses: jannemattila/get-version-from-tag@v3 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-1 + + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v1 + + - name: Build, tag, and push the image to Amazon ECR + id: build-image + env: + AWS_ACCOUNT: ${{ secrets.AWS_ACCOUNT_ID }} + ECR_REPOSITORY: 'crispresso2' + AWS_REGION: 'us-east-1' + IMAGE_TAG: ${{ steps.get_version.outputs.version }} + run: | + # Build a docker container and push it to ECR + docker build -t $AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG . + echo "Pushing image to ECR..." + docker push $AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG + echo "::set-output name=image::$AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG" From 94a1052570e1cb586c30a24489f9a3cec20d47fb Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 10 May 2024 16:27:46 -0600 Subject: [PATCH 06/16] Pass div id for plotly --- CRISPResso2/CRISPRessoAggregateCORE.py | 8 ++++++++ CRISPResso2/CRISPRessoBatchCORE.py | 10 ++++++++-- CRISPResso2/CRISPRessoReports/CRISPRessoReport.py | 12 ++++++++++++ .../CRISPRessoReports/templates/batchReport.html | 12 ++++++------ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index 59d8897f..7e5b9b0d 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -614,12 +614,14 @@ def main(): crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_titles'] = {} crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_labels'] = {} crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas'] = {} + crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_names'] = [] crispresso2_info['results']['general_plots']['allele_modification_line_plot_paths'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_titles'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {} + crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'] = {} if guides_all_same: sgRNA_intervals = [consensus_sgRNA_intervals] * modification_frequency_summary_df.shape[0] else: @@ -645,11 +647,13 @@ def main(): plot_name = 'CRISPRessoAggregate_percentage_of_{0}_across_alleles_{1}_heatmap'.format(modification_type.lower(), amplicon_name) plot_path = '{0}.html'.format(_jp(plot_name)) + heatmap_div_id = '{0}-allele-modification-heatmap-{1}'.format(amplicon_name.lower(), modification_type.lower()) allele_modification_heatmap_input = { 'sample_values': modification_df, 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'div_id': heatmap_div_id, } plot( CRISPRessoPlot.plot_allele_modification_heatmap, @@ -671,15 +675,18 @@ def main(): ), ), ] + crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'][plot_name] = heatmap_div_id plot_name = 'CRISPRessoAggregate_percentage_of_{0}_across_alleles_{1}_line'.format(modification_type.lower(), amplicon_name) plot_path = '{0}.html'.format(_jp(plot_name)) + line_div_id = '{0}-allele-modification-line-{1}'.format(amplicon_name.lower(), modification_type.lower()) allele_modification_line_input = { 'sample_values': modification_df, 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'div_id': line_div_id, } plot( CRISPRessoPlot.plot_allele_modification_line, @@ -700,6 +707,7 @@ def main(): ), ), ] + crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'][plot_name] = line_div_id crispresso2_info['results']['general_plots']['window_nuc_pct_quilt_plot_names'] = window_nuc_pct_quilt_plot_names crispresso2_info['results']['general_plots']['nuc_pct_quilt_plot_names'] = nuc_pct_quilt_plot_names diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 4e387f98..dffd8fa5 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -388,12 +388,14 @@ def main(): crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_titles'] = {} crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_labels'] = {} crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas'] = {} + crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_names'] = [] crispresso2_info['results']['general_plots']['allele_modification_line_plot_paths'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_titles'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {} crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {} + crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'] = {} large_plot_cutoff = 300 @@ -771,12 +773,13 @@ def main(): plot_name = 'CRISPRessoBatch_percentage_of_{0}_across_alleles_{1}_heatmap'.format(modification_type.lower(), amplicon_name) plot_path = '{0}.html'.format(_jp(plot_name)) + heatmap_div_id = '{0}-allele-modification-heatmap-{1}'.format(amplicon_name.lower(), modification_type.lower()) allele_modification_heatmap_input = { 'sample_values': modification_df, 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, - 'amplicon_name': amplicon_name, + 'div_id': heatmap_div_id, } debug('Plotting allele modification heatmap for {0}'.format(amplicon_name)) plot( @@ -799,16 +802,18 @@ def main(): ), ), ] + crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'][plot_name] = heatmap_div_id plot_name = 'CRISPRessoBatch_percentage_of_{0}_across_alleles_{1}_line'.format(modification_type.lower(), amplicon_name) plot_path = '{0}.html'.format(_jp(plot_name)) + line_div_id = '{0}-allele-modification-line-{1}'.format(amplicon_name.lower(), modification_type.lower()) allele_modification_line_input = { 'sample_values': modification_df, 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, - 'amplicon_name': amplicon_name, + 'div_id': line_div_id, } debug('Plotting allele modification line plot for {0}'.format(amplicon_name)) plot( @@ -831,6 +836,7 @@ def main(): ), ), ] + crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'][plot_name] = line_div_id #end if amp_found_count > 0 (how many folders had information for this amplicon) #end per-amplicon analysis diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py index 8e86084e..96841171 100644 --- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py +++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py @@ -261,6 +261,10 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, allele_modification_heatmap_plot['datas'] = crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas'] else: allele_modification_heatmap_plot['datas'] = {} + if 'allele_modification_heatmap_plot_divs' in crispresso2_info['results']['general_plots']: + allele_modification_heatmap_plot['divs'] = crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'] + else: + allele_modification_heatmap_plot['divs'] = {} allele_modification_line_plot = {} if 'allele_modification_line_plot_names' in crispresso2_info['results']['general_plots']: @@ -283,6 +287,10 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, allele_modification_line_plot['datas'] = crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] else: allele_modification_line_plot['datas'] = {} + if 'allele_modification_line_plot_divs' in crispresso2_info['results']['general_plots']: + allele_modification_line_plot['divs'] = crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'] + else: + allele_modification_line_plot['divs'] = {} allele_modification_heatmap_plot['htmls'] = {} for heatmap_plot_name, heatmap_plot_path in allele_modification_heatmap_plot['paths'].items(): @@ -572,6 +580,7 @@ def fill_default(dictionary, key, default_type=list): ('titles', list), ('labels', dict), ('datas', dict), + ('divs', dict) ] for dictionary in dictionaries: for key, default_type in keys_and_default_types: @@ -590,6 +599,7 @@ def fill_default(dictionary, key, default_type=list): } for html in sub_html_files: sub_html_files[html] = crispresso_data_path + sub_html_files[html] + breakpoint() with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile: outfile.write(render_template( template, @@ -618,11 +628,13 @@ def fill_default(dictionary, key, default_type=list): allele_modification_heatmap_plot_titles=allele_modification_heatmap_plot['titles'], allele_modification_heatmap_plot_labels=allele_modification_heatmap_plot['labels'], allele_modification_heatmap_plot_datas=allele_modification_heatmap_plot['datas'], + allele_modification_heatmap_plot_divs=allele_modification_heatmap_plot['divs'], allele_modification_line_plot_names=allele_modification_line_plot['names'], allele_modification_line_plot_htmls=allele_modification_line_plot['htmls'], allele_modification_line_plot_titles=allele_modification_line_plot['titles'], allele_modification_line_plot_labels=allele_modification_line_plot['labels'], allele_modification_line_plot_datas=allele_modification_line_plot['datas'], + allele_modification_line_plot_divs=allele_modification_line_plot['divs'], C2PRO_INSTALLED=C2PRO_INSTALLED, )) diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html index 59bd6f5b..8909443d 100644 --- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -147,10 +147,10 @@
{{report_data['titles'][plot_name]}}
{{allele_modification_heatmap_plot_titles[heatmap_plot_name]}}
@@ -175,13 +175,13 @@
{{allele_modification_heatmap_plot_titles[heatmap_plot_name]}}