From 0226548fdf224a428ea28f5cb775c1a04df547e1 Mon Sep 17 00:00:00 2001
From: Samuel Nichols <Snic9004@gmail.com>
Date: Mon, 6 May 2024 16:08:00 -0600
Subject: [PATCH 01/16] Sam/try plots (#71)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Point test to try-plots

* Fix d3 not showing and plotly mixing with matplotlib

* Use logger for warnings and debug statements

* Point tests back at master

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>
---
 CRISPResso2/CRISPRessoAggregateCORE.py           |  7 ++++++-
 CRISPResso2/CRISPRessoBatchCORE.py               |  7 ++++++-
 CRISPResso2/CRISPRessoCORE.py                    | 15 ++++++++++-----
 CRISPResso2/CRISPRessoMultiProcessing.py         | 15 +++++++++++----
 .../CRISPRessoReports/CRISPRessoReport.py        |  1 -
 .../templates/shared/partials/fig_reports.html   | 16 +++++++++-------
 6 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
index 5cd4cba9..59d8897f 100644
--- a/CRISPResso2/CRISPRessoAggregateCORE.py
+++ b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -869,7 +869,12 @@ def main():
                 debug('Plot pool results:')
                 for future in process_futures:
                     debug('future: ' + str(future))
-            future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
+            for future in process_futures:
+                try:
+                    future.result()
+                except Exception as e:
+                    logger.warning('Error in plot pool: %s' % e)
+                    logger.debug(traceback.format_exc())
             process_pool.shutdown()
 
         info('Analysis Complete!', {'percent_complete': 100})
diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index 4c60e2da..4e387f98 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -904,7 +904,12 @@ def main():
                 debug('CRISPResso batch results:')
                 for future in process_futures:
                     debug('future: ' + str(future))
-            future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
+            for future in process_futures:
+                try:
+                    future.result()
+                except Exception as e:
+                    logger.warning('Error in plot pool: %s' % e)
+                    logger.debug(traceback.format_exc())
             process_pool.shutdown()
 
         if not args.suppress_report:
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
index a1590604..a3a57b9f 100644
--- a/CRISPResso2/CRISPRessoCORE.py
+++ b/CRISPResso2/CRISPRessoCORE.py
@@ -3776,11 +3776,11 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                     mod_df_for_plot.insert(0, 'Batch', ref_name)
 
                     plot_root = _jp('2a.'+ ref_plot_name + 'Nucleotide_percentage_quilt')
-                    pro_output_name = f'plot_{os.path.basename(plot_root)}.json'
+                    pro_output_name = os.path.join(OUTPUT_DIRECTORY, f'plot_{os.path.basename(plot_root)}.json')
                     plot_2a_input = {
                         'nuc_pct_df': nuc_df_for_plot,
                         'mod_pct_df': mod_df_for_plot,
-                        'fig_filename_root': f'{_jp(pro_output_name)}' if not args.use_matplotlib and C2PRO_INSTALLED else plot_root,
+                        'fig_filename_root': pro_output_name if not args.use_matplotlib and C2PRO_INSTALLED else plot_root,
                         'save_also_png': save_png,
                         'sgRNA_intervals': sgRNA_intervals,
                         'sgRNA_names': sgRNA_names,
@@ -3824,11 +3824,11 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                         for x in include_idxs_list:
                             new_include_idx += [x - new_sel_cols_start]
                         plot_root = _jp('2b.'+ ref_plot_name + 'Nucleotide_percentage_quilt_around_' + sgRNA_label)
-                        pro_output_name = f'plot_{os.path.basename(plot_root)}.json'
+                        pro_output_name = os.path.join(OUTPUT_DIRECTORY, f'plot_{os.path.basename(plot_root)}.json')
                         plot_2b_input = {
                             'nuc_pct_df': nuc_df_for_plot.iloc[:, sel_cols],
                             'mod_pct_df': mod_df_for_plot.iloc[:, sel_cols],
-                            'fig_filename_root': f'{_jp(pro_output_name)}' if not args.use_matplotlib and C2PRO_INSTALLED else plot_root,
+                            'fig_filename_root': pro_output_name if not args.use_matplotlib and C2PRO_INSTALLED else plot_root,
                             'save_also_png': save_png,
                             'sgRNA_intervals': new_sgRNA_intervals,
                             'sgRNA_names': sgRNA_names,
@@ -4893,7 +4893,12 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                 debug('Plot pool results:')
                 for future in process_futures:
                     debug('future: ' + str(future))
-            future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
+            for future in process_futures:
+                try:
+                    future.result()
+                except Exception as e:
+                    logger.warning('Error in plot pool: %s' % e)
+                    logger.debug(traceback.format_exc())
             process_pool.shutdown()
 
         info('Done!')
diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index d9afb3d1..0ea1f813 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -14,6 +14,7 @@
 from inspect import getmodule, stack
 import numpy as np
 import pandas as pd
+import traceback
 
 def get_max_processes():
     return mp.cpu_count()
@@ -271,7 +272,13 @@ def run_plot(plot_func, plot_args, num_processes, process_futures, process_pool)
     -------
     None
     """
-    if num_processes > 1:
-        process_futures[process_pool.submit(plot_func, **plot_args)] = (plot_func, plot_args)
-    else:
-        plot_func(**plot_args)
+    logger = logging.getLogger(getmodule(stack()[1][0]).__name__)
+    try:
+        if num_processes > 1:
+            process_futures[process_pool.submit(plot_func, **plot_args)] = (plot_func, plot_args)
+        else:
+            plot_func(**plot_args)
+    except Exception as e:
+        logger.warn(f"Plot error {e}, skipping plot \n")
+        logger.debug(traceback.format_exc())
+
diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
index fde5fc1d..8e86084e 100644
--- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
+++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
@@ -588,7 +588,6 @@ def fill_default(dictionary, key, default_type=list):
             'datas': [],
             'htmls': [],
         }
-
     for html in sub_html_files:
         sub_html_files[html] = crispresso_data_path + sub_html_files[html]
     with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile:
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html
index e21c6853..b30c6cdc 100644
--- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html
+++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html
@@ -13,7 +13,7 @@
     {%- if amplicon_name is defined -%}
         {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'][amplicon_name] -%}
             {{report_data['figures']['htmls'][amplicon_name][fig_name]|safe}}
-        {%- else -%}
+        {%- elif fig_name in report_data['figures']['locs'][amplicon_name] -%}
             <a href="{{report_data['crispresso_data_path']}}{{report_data['figures']['locs'][amplicon_name][fig_name]}}.pdf"><img src="{{report_data['crispresso_data_path']}}{{report_data['figures']['locs'][amplicon_name][fig_name]}}.png" width='{{width}}'></a>
         {% endif -%}
         <label class="labelpadding">{{report_data['figures']['captions'][amplicon_name][fig_name]}}</label>
@@ -23,12 +23,14 @@
     {%- else %}
         {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'] -%}
             {{report_data['figures']['htmls'][fig_name]|safe}}
-        {%- else -%}
+        {%- elif fig_name in report_data['figures']['locs'] -%}
             <a href="{{report_data['crispresso_data_path']}}{{report_data['figures']['locs'][fig_name]}}.pdf"><img src="{{report_data['crispresso_data_path']}}{{report_data['figures']['locs'][fig_name]}}.png" width='{{width}}'></a>
         {% endif -%}
-        <label class="labelpadding">{{report_data['figures']['captions'][fig_name]}}</label>
-        {%- for (data_label,data_path) in report_data['figures']['datas'][fig_name] %}
-            <p class="m-0"><small>Data: <a href="{{report_data['crispresso_data_path']}}{{data_path}}">{{data_label}}</a></small></p>
-        {%- endfor -%}
+        {% if fig_name in report_data['figures']['captions'] and fig_name in report_data['figures']['datas'] %}
+            <label class="labelpadding">{{report_data['figures']['captions'][fig_name]}}</label>
+            {%- for (data_label,data_path) in report_data['figures']['datas'][fig_name] %}
+                <p class="m-0"><small>Data: <a href="{{report_data['crispresso_data_path']}}{{data_path}}">{{data_label}}</a></small></p>
+            {%- endfor -%}
+        {%- endif %}
     {%- endif %}
-</div>
+</div>
\ No newline at end of file

From 3513a6c67caea631b0a5094dbe52f29ae6384aa7 Mon Sep 17 00:00:00 2001
From: Samuel Nichols <Snic9004@gmail.com>
Date: Mon, 6 May 2024 16:16:33 -0600
Subject: [PATCH 02/16] Sam/fix plots (#72)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Fix d3 not showing and plotly mixing with matplotlib

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

From d88094268257eac459aa14e9eeceb5d13ec05909 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Tue, 7 May 2024 10:22:11 -0600
Subject: [PATCH 03/16] Remove token from integration tests file

---
 .github/workflows/integration_tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index 7d968708..92a11f39 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -45,7 +45,6 @@ jobs:
       uses: actions/checkout@v3
       with:
         repository: edilytics/CRISPResso2_tests
-        token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }}
         # ref: '<BRANCH-NAME>' # Use this to specify a branch other than master
 
     - name: Run Basic

From 64ef72e0b6c92ce8b05f1b7da3706c14ba38dcd8 Mon Sep 17 00:00:00 2001
From: Trevor Martin <60452953+trevormartinj7@users.noreply.github.com>
Date: Thu, 9 May 2024 15:08:42 -0600
Subject: [PATCH 04/16] Passing sgRNA sequences to regular and Batch D3 plots
 (#73)

* Sam/try plots (#71)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Point test to try-plots

* Fix d3 not showing and plotly mixing with matplotlib

* Use logger for warnings and debug statements

* Point tests back at master

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

* Sam/fix plots (#72)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Fix d3 not showing and plotly mixing with matplotlib

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

* Remove token from integration tests file

* Provide sgRNA_sequences to plot_nucleotide_quilt plots

* Passing sgRNA_sequences to plot

* Refactor check for determining when to use CRISPREssoPro or matplotlib for Batch plots

* Add max-height to Batch report samples

* Change testing branch

* Fix wrong check for large Batch plots

* Update integration_tests.yml to point back at master

---------

Co-authored-by: Samuel Nichols <Snic9004@gmail.com>
Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>
---
 CRISPResso2/CRISPRessoBatchCORE.py            | 45 +++++++++++++++----
 CRISPResso2/CRISPRessoCORE.py                 |  7 +++
 .../templates/batchReport.html                |  2 +-
 .../shared/partials/failed_runs.html          |  2 +-
 tests/unit_tests/test_CRISPRessoBatchCORE.py  | 44 ++++++++++++++++++
 5 files changed, 89 insertions(+), 11 deletions(-)
 create mode 100644 tests/unit_tests/test_CRISPRessoBatchCORE.py

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index 4e387f98..f6ca5550 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -49,6 +49,33 @@ def check_library(library_name):
 np = check_library('numpy')
 
 
+def should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff=300):
+    """Determine if large plots should be plotted.
+
+    Parameters
+    ----------
+    num_rows : int
+        Number of rows in the dataframe.
+    c2pro_installed : bool
+        Whether CRISPRessoPro is installed.
+    use_matplotlib : bool
+        Whether to use matplotlib when CRISPRessoPro is installed, i.e. value
+        of `--use_matplotlib`.
+    large_plot_cutoff : int, optional
+        Number of samples at which to not plot large plots with matplotlib.
+        Note that each sample has 6 rows in the datafame. Defaults to 300.
+
+    Returns
+    -------
+    bool
+        Whether to plot large plots.
+    """
+    return (
+        (not use_matplotlib and c2pro_installed)
+        or (num_rows / 6) < large_plot_cutoff
+    )
+
+
 def main():
     try:
         start_time =  datetime.now()
@@ -395,8 +422,6 @@ def main():
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {}
 
-        large_plot_cutoff = 300
-
         percent_complete_start, percent_complete_end = 90, 99
         if all_amplicons:
             percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
@@ -580,7 +605,7 @@ def main():
                         sub_modification_percentage_summary_filename = _jp(amplicon_plot_name + 'Modification_percentage_summary_around_sgRNA_'+sgRNA+'.txt')
                         sub_modification_percentage_summary_df.to_csv(sub_modification_percentage_summary_filename, sep='\t', index=None)
 
-                        if not args.suppress_plots and not args.suppress_batch_summary_plots and (nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff:
+                        if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(sub_nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib):
                             # plot for each guide
                             # show all sgRNA's on the plot
                             sub_sgRNA_intervals = []
@@ -614,6 +639,7 @@ def main():
                                 'fig_filename_root': f'{this_window_nuc_pct_quilt_plot_name}.json' if not args.use_matplotlib and C2PRO_INSTALLED else this_window_nuc_pct_quilt_plot_name,
                                 'save_also_png': save_png,
                                 'sgRNA_intervals': sub_sgRNA_intervals,
+                                'sgRNA_sequences': consensus_guides,
                                 'quantification_window_idxs': include_idxs,
                                 'custom_colors': custom_config['colors'],
                             }
@@ -628,7 +654,7 @@ def main():
                             crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name
                             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
 
-                            if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff:
+                            if args.base_editor_output and should_plot_large_plots(sub_nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib):
                                 this_window_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_conversion_map_around_sgRNA_'+sgRNA)
                                 conversion_map_input = {
                                     'nuc_pct_df': sub_nucleotide_percentage_summary_df,
@@ -656,7 +682,7 @@ def main():
                                                                                 ]
                         # done with per-sgRNA plots
 
-                    if not args.suppress_plots and not args.suppress_batch_summary_plots:  # plot the whole region
+                    if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib):  # plot the whole region
                         this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
@@ -664,6 +690,7 @@ def main():
                             'fig_filename_root': f'{this_nuc_pct_quilt_plot_name}.json' if not args.use_matplotlib and C2PRO_INSTALLED else this_nuc_pct_quilt_plot_name,
                             'save_also_png': save_png,
                             'sgRNA_intervals': consensus_sgRNA_intervals,
+                            'sgRNA_sequences': consensus_guides,
                             'quantification_window_idxs': include_idxs,
                             'custom_colors': custom_config['colors'],
                         }
@@ -679,7 +706,7 @@ def main():
                             crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = ''
                         crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base for the amplicon ' + amplicon_name
                         crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
-                        if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff:
+                        if args.base_editor_output and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib):
                             this_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_conversion_map')
                             conversion_map_input = {
                                 'nuc_pct_df': nucleotide_percentage_summary_df,
@@ -706,7 +733,7 @@ def main():
                             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
 
                 else:  # guides are not the same
-                    if not args.suppress_plots and not args.suppress_batch_summary_plots:
+                    if not args.suppress_plots and not args.suppress_batch_summary_plots and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], C2PRO_INSTALLED, args.use_matplotlib):
                         this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
@@ -724,7 +751,7 @@ def main():
                         nuc_pct_quilt_plot_names.append(plot_name)
                         crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base for the amplicon ' + amplicon_name
                         crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
-                        if args.base_editor_output and (sub_nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff:
+                        if args.base_editor_output and should_plot_large_plots(nucleotide_percentage_summary_df.shape[0], False, args.use_matplotlib):
                             this_nuc_conv_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
                             conversion_map_input = {
                                 'nuc_pct_df': nucleotide_percentage_summary_df,
@@ -745,7 +772,7 @@ def main():
                             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
 
                 # allele modification frequency heatmap and line plots
-                if C2PRO_INSTALLED and not args.use_matplotlib and not args.suppress_plots and not args.suppress_batch_summary_plots and (nucleotide_percentage_summary_df.shape[0] / 6) < large_plot_cutoff:
+                if C2PRO_INSTALLED and not args.use_matplotlib and not args.suppress_plots and not args.suppress_batch_summary_plots:
                     if guides_all_same:
                         sgRNA_intervals = [consensus_sgRNA_intervals] * modification_frequency_summary_df.shape[0]
                     else:
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
index a3a57b9f..10da6787 100644
--- a/CRISPResso2/CRISPRessoCORE.py
+++ b/CRISPResso2/CRISPRessoCORE.py
@@ -3785,6 +3785,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                         'sgRNA_intervals': sgRNA_intervals,
                         'sgRNA_names': sgRNA_names,
                         'sgRNA_mismatches': sgRNA_mismatches,
+                        'sgRNA_sequences': sgRNA_sequences,
                         'quantification_window_idxs': include_idxs_list,
                         'custom_colors': custom_config["colors"],
                     }
@@ -3833,6 +3834,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                             'sgRNA_intervals': new_sgRNA_intervals,
                             'sgRNA_names': sgRNA_names,
                             'sgRNA_mismatches': sgRNA_mismatches,
+                            'sgRNA_sequences': [sgRNA],
                             'quantification_window_idxs': new_include_idx,
                             'custom_colors': custom_config["colors"],
                         }
@@ -4184,6 +4186,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                     sgRNA_intervals = refs[ref_names_for_hdr[0]]['sgRNA_intervals']
                     sgRNA_names = refs[ref_names_for_hdr[0]]['sgRNA_names']
                     sgRNA_mismatches = refs[ref_names_for_hdr[0]]['sgRNA_mismatches']
+                    sgRNA_sequences = refs[ref_names_for_hdr[0]]['sgRNA_sequences']
 #                    include_idxs_list = refs[ref_names_for_hdr[0]]['include_idxs']
                     include_idxs_list = [] # the quantification windows may be different between different amplicons
 
@@ -4204,6 +4207,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
                         'quantification_window_idxs': include_idxs_list,
                         'sgRNA_names': sgRNA_names,
                         'sgRNA_mismatches': sgRNA_mismatches,
+                        'sgRNA_sequences': sgRNA_sequences,
                         'custom_colors': custom_config["colors"],
                     }
                     debug('Plotting HDR nucleotide quilt')
@@ -4789,6 +4793,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                 sgRNA_intervals = refs[ref_names[0]]['sgRNA_intervals']
                 sgRNA_names = refs[ref_names[0]]['sgRNA_names']
                 sgRNA_mismatches = refs[ref_names[0]]['sgRNA_mismatches']
+                sgRNA_sequences = refs[ref_names[0]]['sgRNA_sequences']
                 include_idxs_list = refs[ref_names[0]]['include_idxs']
 
                 plot_root = _jp('11a.Prime_editing_nucleotide_percentage_quilt')
@@ -4801,6 +4806,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                     'sgRNA_intervals': sgRNA_intervals,
                     'sgRNA_names': sgRNA_names,
                     'sgRNA_mismatches': sgRNA_mismatches,
+                    'sgRNA_sequences': sgRNA_sequences,
                     'quantification_window_idxs': include_idxs_list,
                     'custom_colors': custom_config['colors']
                 }
@@ -4861,6 +4867,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                         'sgRNA_intervals': new_sgRNA_intervals,
                         'sgRNA_names': sgRNA_names,
                         'sgRNA_mismatches': sgRNA_mismatches,
+                        'sgRNA_sequences': [sgRNA],
                         'quantification_window_idxs': new_include_idx,
                         'custom_colors': custom_config['colors']
                     }
diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
index 59bd6f5b..3b030fcc 100644
--- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html
+++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
@@ -58,7 +58,7 @@
                 <h5 id="CRISPResso2_Batch_Output">{{report_name}}</h5>
               </div>
               <div class='card-body p-0'>
-                <div class="list-group list-group-flush">
+                <div class="list-group list-group-flush" style="max-height: 25vh; overflow-y: scroll;">
               {% for run_name in run_names %}
 	      <a href="{{sub_html_files[run_name]}}" class="list-group-item list-group-item-action" id="{{run_name}}">{{run_name}}</a>
                 {% endfor %}
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html
index 015dda0f..aab88602 100644
--- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html
+++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html
@@ -9,7 +9,7 @@ <h5 id="failed_runs" class="mb-0 text-white">Failed Runs</h5>
     </div>
 
     <div class='card-body p-0'>
-      <div class="list-group list-group-flush">
+      <div class="list-group list-group-flush" style="max-height: 25vh; overflow-y: scroll;">
         {% for failed_run in failed_runs %}
         {# Toggle the description visibility on click #}
         <a href="javascript:void(0)" class="list-group-item list-group-item-action failed-run-name bg-light text-dark"
diff --git a/tests/unit_tests/test_CRISPRessoBatchCORE.py b/tests/unit_tests/test_CRISPRessoBatchCORE.py
new file mode 100644
index 00000000..ab57b4e0
--- /dev/null
+++ b/tests/unit_tests/test_CRISPRessoBatchCORE.py
@@ -0,0 +1,44 @@
+from CRISPResso2 import CRISPRessoBatchCORE
+
+
+
+def test_should_plot_large_plots():
+    num_rows = 60
+    c2pro_installed = False
+    use_matplotlib = False
+    large_plot_cutoff = 300
+    assert CRISPRessoBatchCORE.should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff)
+
+
+def test_should_plot_large_plots_c2pro_installed_use_matplotlib_small():
+    num_rows = 60
+    c2pro_installed = True
+    use_matplotlib = True
+    large_plot_cutoff = 300
+    assert CRISPRessoBatchCORE.should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff)
+
+
+def test_should_plot_large_plots_c2pro_installed():
+    num_rows = 6000
+    c2pro_installed = True
+    use_matplotlib = False
+    large_plot_cutoff = 300
+    assert CRISPRessoBatchCORE.should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff)
+
+
+def test_should_plot_large_plots_c2pro_installed_use_matplotlib_large():
+    num_rows = 6000
+    c2pro_installed = True
+    use_matplotlib = True
+    large_plot_cutoff = 300
+    assert not CRISPRessoBatchCORE.should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff)
+
+
+def test_should_plot_large_plots_c2pro_not_installed_use_matplotlib():
+    num_rows = 6000
+    c2pro_installed = False
+    use_matplotlib = True
+    large_plot_cutoff = 300
+    assert not CRISPRessoBatchCORE.should_plot_large_plots(num_rows, c2pro_installed, use_matplotlib, large_plot_cutoff)
+
+

From fa5a18d8bbffe5983bbbed0e42f539703b90e44d Mon Sep 17 00:00:00 2001
From: Samuel Nichols <Snic9004@gmail.com>
Date: Fri, 10 May 2024 14:12:57 -0600
Subject: [PATCH 05/16] Push new releases to ECR (#74)

* Create aws_ecr.yml (#1)

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* us-east-1

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml

* Update aws_ecr.yml
---
 .github/workflows/aws_ecr.yml | 47 +++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 .github/workflows/aws_ecr.yml

diff --git a/.github/workflows/aws_ecr.yml b/.github/workflows/aws_ecr.yml
new file mode 100644
index 00000000..ea4e7c98
--- /dev/null
+++ b/.github/workflows/aws_ecr.yml
@@ -0,0 +1,47 @@
+name: Push Docker image to Amazon ECR
+
+on:
+  release:
+    types:
+      - edited
+      - released
+    branches:
+      - master
+
+jobs:
+  build-and-push:
+    name: Build and Push Docker image
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+
+    - id: get_version
+      name: Get version
+      uses: jannemattila/get-version-from-tag@v3
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v1
+      with:
+        aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+        aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        aws-region: us-east-1
+
+    - name: Login to Amazon ECR
+      id: login-ecr
+      uses: aws-actions/amazon-ecr-login@v1
+
+    - name: Build, tag, and push the image to Amazon ECR
+      id: build-image
+      env:
+        AWS_ACCOUNT: ${{ secrets.AWS_ACCOUNT_ID }}
+        ECR_REPOSITORY: 'crispresso2'
+        AWS_REGION: 'us-east-1'
+        IMAGE_TAG: ${{ steps.get_version.outputs.version }}
+      run: |
+        # Build a docker container and push it to ECR 
+        docker build -t $AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG .
+        echo "Pushing image to ECR..."
+        docker push $AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG
+        echo "::set-output name=image::$AWS_ACCOUNT.dkr.ecr.$AWS_REGION.amazonaws.com/$ECR_REPOSITORY:$IMAGE_TAG"

From 94a1052570e1cb586c30a24489f9a3cec20d47fb Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Fri, 10 May 2024 16:27:46 -0600
Subject: [PATCH 06/16] Pass div id for plotly

---
 CRISPResso2/CRISPRessoAggregateCORE.py               |  8 ++++++++
 CRISPResso2/CRISPRessoBatchCORE.py                   | 10 ++++++++--
 CRISPResso2/CRISPRessoReports/CRISPRessoReport.py    | 12 ++++++++++++
 .../CRISPRessoReports/templates/batchReport.html     | 12 ++++++------
 4 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
index 59d8897f..7e5b9b0d 100644
--- a/CRISPResso2/CRISPRessoAggregateCORE.py
+++ b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -614,12 +614,14 @@ def main():
                         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_titles'] = {}
                         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_labels'] = {}
                         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas'] = {}
+                        crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'] = {}
 
                         crispresso2_info['results']['general_plots']['allele_modification_line_plot_names'] = []
                         crispresso2_info['results']['general_plots']['allele_modification_line_plot_paths'] = {}
                         crispresso2_info['results']['general_plots']['allele_modification_line_plot_titles'] = {}
                         crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {}
                         crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {}
+                        crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'] = {}
                         if guides_all_same:
                             sgRNA_intervals = [consensus_sgRNA_intervals] * modification_frequency_summary_df.shape[0]
                         else:
@@ -645,11 +647,13 @@ def main():
                             plot_name = 'CRISPRessoAggregate_percentage_of_{0}_across_alleles_{1}_heatmap'.format(modification_type.lower(), amplicon_name)
                             plot_path = '{0}.html'.format(_jp(plot_name))
 
+                            heatmap_div_id = '{0}-allele-modification-heatmap-{1}'.format(amplicon_name.lower(), modification_type.lower())
                             allele_modification_heatmap_input = {
                                 'sample_values': modification_df,
                                 'sample_sgRNA_intervals': sgRNA_intervals,
                                 'plot_path': plot_path,
                                 'title': modification_type,
+                                'div_id': heatmap_div_id,
                             }
                             plot(
                                 CRISPRessoPlot.plot_allele_modification_heatmap,
@@ -671,15 +675,18 @@ def main():
                                     ),
                                 ),
                             ]
+                            crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'][plot_name] = heatmap_div_id
 
                             plot_name = 'CRISPRessoAggregate_percentage_of_{0}_across_alleles_{1}_line'.format(modification_type.lower(), amplicon_name)
                             plot_path = '{0}.html'.format(_jp(plot_name))
 
+                            line_div_id = '{0}-allele-modification-line-{1}'.format(amplicon_name.lower(), modification_type.lower())
                             allele_modification_line_input = {
                                 'sample_values': modification_df,
                                 'sample_sgRNA_intervals': sgRNA_intervals,
                                 'plot_path': plot_path,
                                 'title': modification_type,
+                                'div_id': line_div_id,
                             }
                             plot(
                                 CRISPRessoPlot.plot_allele_modification_line,
@@ -700,6 +707,7 @@ def main():
                                     ),
                                 ),
                             ]
+                            crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'][plot_name] = line_div_id
 
             crispresso2_info['results']['general_plots']['window_nuc_pct_quilt_plot_names'] = window_nuc_pct_quilt_plot_names
             crispresso2_info['results']['general_plots']['nuc_pct_quilt_plot_names'] = nuc_pct_quilt_plot_names
diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index 4e387f98..dffd8fa5 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -388,12 +388,14 @@ def main():
         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_titles'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_labels'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas'] = {}
+        crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'] = {}
 
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_names'] = []
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_paths'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_titles'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_labels'] = {}
         crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas'] = {}
+        crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'] = {}
 
         large_plot_cutoff = 300
 
@@ -771,12 +773,13 @@ def main():
                         plot_name = 'CRISPRessoBatch_percentage_of_{0}_across_alleles_{1}_heatmap'.format(modification_type.lower(), amplicon_name)
                         plot_path = '{0}.html'.format(_jp(plot_name))
 
+                        heatmap_div_id = '{0}-allele-modification-heatmap-{1}'.format(amplicon_name.lower(), modification_type.lower())
                         allele_modification_heatmap_input = {
                             'sample_values': modification_df,
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
-                            'amplicon_name': amplicon_name,
+                            'div_id': heatmap_div_id,
                         }
                         debug('Plotting allele modification heatmap for {0}'.format(amplicon_name))
                         plot(
@@ -799,16 +802,18 @@ def main():
                                 ),
                             ),
                         ]
+                        crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs'][plot_name] = heatmap_div_id
 
                         plot_name = 'CRISPRessoBatch_percentage_of_{0}_across_alleles_{1}_line'.format(modification_type.lower(), amplicon_name)
                         plot_path = '{0}.html'.format(_jp(plot_name))
 
+                        line_div_id = '{0}-allele-modification-line-{1}'.format(amplicon_name.lower(), modification_type.lower())
                         allele_modification_line_input = {
                             'sample_values': modification_df,
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
-                            'amplicon_name': amplicon_name,
+                            'div_id': line_div_id,
                         }
                         debug('Plotting allele modification line plot for {0}'.format(amplicon_name))
                         plot(
@@ -831,6 +836,7 @@ def main():
                                 ),
                             ),
                         ]
+                        crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs'][plot_name] = line_div_id
             #end if amp_found_count > 0 (how many folders had information for this amplicon)
         #end per-amplicon analysis
 
diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
index 8e86084e..96841171 100644
--- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
+++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
@@ -261,6 +261,10 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info,
         allele_modification_heatmap_plot['datas'] = crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_datas']
     else:
         allele_modification_heatmap_plot['datas'] = {}
+    if 'allele_modification_heatmap_plot_divs' in crispresso2_info['results']['general_plots']:
+        allele_modification_heatmap_plot['divs'] = crispresso2_info['results']['general_plots']['allele_modification_heatmap_plot_divs']
+    else:
+        allele_modification_heatmap_plot['divs'] = {}
 
     allele_modification_line_plot = {}
     if 'allele_modification_line_plot_names' in crispresso2_info['results']['general_plots']:
@@ -283,6 +287,10 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info,
         allele_modification_line_plot['datas'] = crispresso2_info['results']['general_plots']['allele_modification_line_plot_datas']
     else:
         allele_modification_line_plot['datas'] = {}
+    if 'allele_modification_line_plot_divs' in crispresso2_info['results']['general_plots']:
+        allele_modification_line_plot['divs'] = crispresso2_info['results']['general_plots']['allele_modification_line_plot_divs']
+    else:
+        allele_modification_line_plot['divs'] = {}
 
     allele_modification_heatmap_plot['htmls'] = {}
     for heatmap_plot_name, heatmap_plot_path in allele_modification_heatmap_plot['paths'].items():
@@ -572,6 +580,7 @@ def fill_default(dictionary, key, default_type=list):
         ('titles', list),
         ('labels', dict),
         ('datas', dict),
+        ('divs', dict)
     ]
     for dictionary in dictionaries:
         for key, default_type in keys_and_default_types:
@@ -590,6 +599,7 @@ def fill_default(dictionary, key, default_type=list):
         }
     for html in sub_html_files:
         sub_html_files[html] = crispresso_data_path + sub_html_files[html]
+    breakpoint()
     with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile:
         outfile.write(render_template(
             template,
@@ -618,11 +628,13 @@ def fill_default(dictionary, key, default_type=list):
             allele_modification_heatmap_plot_titles=allele_modification_heatmap_plot['titles'],
             allele_modification_heatmap_plot_labels=allele_modification_heatmap_plot['labels'],
             allele_modification_heatmap_plot_datas=allele_modification_heatmap_plot['datas'],
+            allele_modification_heatmap_plot_divs=allele_modification_heatmap_plot['divs'],
             allele_modification_line_plot_names=allele_modification_line_plot['names'],
             allele_modification_line_plot_htmls=allele_modification_line_plot['htmls'],
             allele_modification_line_plot_titles=allele_modification_line_plot['titles'],
             allele_modification_line_plot_labels=allele_modification_line_plot['labels'],
             allele_modification_line_plot_datas=allele_modification_line_plot['datas'],
+            allele_modification_line_plot_divs=allele_modification_line_plot['divs'],
             C2PRO_INSTALLED=C2PRO_INSTALLED,
         ))
 
diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
index 59bd6f5b..8909443d 100644
--- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html
+++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
@@ -147,10 +147,10 @@ <h5>{{report_data['titles'][plot_name]}}</h5>
                     <h5>{{allele_modification_heatmap_plot_titles[heatmap_plot_name]}}</h5>
                     <ul class="nav nav-tabs justify-content-center card-header-tabs" id="aln-tab" role="tablist">
                         <li class="nav-item" role="presentation">
-                            <a class="nav-link active" data-bs-toggle="tab" id="{{modification_type}}-heatmap-tab" data-bs-target="#{{heatmap_plot_name}}" role="tab" aria-controls="{{heatmap_plot_name}}" aria-selected="true">Heatmap</a>
+                            <a class="nav-link active" data-bs-toggle="tab" id="{{heatmap_plot_name}}-heatmap-tab" data-bs-target="#{{heatmap_plot_name}}" role="tab" aria-controls="{{heatmap_plot_name}}" aria-selected="true">Heatmap</a>
                         </li>
                         <li class="nav-item" role="presentation">
-                            <a class="nav-link" data-bs-toggle="tab" id="{{modification_type}}-line-tab" data-bs-target="#{{line_plot_name}}" role="tab" aria-controls="{{line_plot_name}}" aria-selected="false">Line</a>
+                            <a class="nav-link" data-bs-toggle="tab" id="{{line_plot_name}}-line-tab" data-bs-target="#{{line_plot_name}}" role="tab" aria-controls="{{line_plot_name}}" aria-selected="false">Line</a>
                         </li>
                     </ul>
                 </div>
@@ -175,13 +175,13 @@ <h5>{{allele_modification_heatmap_plot_titles[heatmap_plot_name]}}</h5>
               </div>
               <script type="application/javascript">
                document.addEventListener("DOMContentLoaded", () => {
-                   $("#{{modification_type}}-heatmap-tab").on("shown.bs.tab", (e) => {
-                       let plot = document.getElementById("allele-modification-heatmap-{{modification_type}}");
+                   $("#{{heatmap_plot_name}}-heatmap-tab").on("shown.bs.tab", (e) => {
+                       let plot = document.getElementById("{{ allele_modification_heatmap_plot_divs[heatmap_plot_name] }}" );
                        Plotly.Plots.resize(plot);
                        window.dispatchEvent(new Event("resize"));
                    });
-                   $("#{{modification_type}}-line-tab").on("shown.bs.tab", (e) => {
-                       let plot = document.getElementById("allele-modification-line-{{modification_type}}");
+                   $("#{{line_plot_name}}-line-tab").on("shown.bs.tab", (e) => {
+                       let plot = document.getElementById("{{ allele_modification_line_plot_divs[line_plot_name] }}");
                        Plotly.Plots.resize(plot);
                        window.dispatchEvent(new Event("resize"));
                    });

From 0057b79598385ac1044469865602c3b3eb3c8443 Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Fri, 10 May 2024 16:40:38 -0600
Subject: [PATCH 07/16] Remove debug

---
 CRISPResso2/CRISPRessoReports/CRISPRessoReport.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
index 96841171..eb16730a 100644
--- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
+++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
@@ -599,7 +599,6 @@ def fill_default(dictionary, key, default_type=list):
         }
     for html in sub_html_files:
         sub_html_files[html] = crispresso_data_path + sub_html_files[html]
-    breakpoint()
     with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile:
         outfile.write(render_template(
             template,

From bb0c126c761925d6be234c9858fedc44cfd808c2 Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Mon, 10 Jun 2024 09:45:36 -0600
Subject: [PATCH 08/16] Don't use thread pool with 1 process

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 39 ++++++++++++++++--------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index 0ea1f813..2f00f528 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -91,11 +91,12 @@ def run_crispresso_cmds(crispresso_cmds, n_processes="1", descriptor = 'region',
         int_n_processes = int(n_processes)
 
     logger.info("Running CRISPResso with %d processes" % int_n_processes)
-    pool = mp.Pool(processes=int_n_processes)
+    if int_n_processes > 1:
+        pool = mp.Pool(processes=int_n_processes)
+        pFunc = partial(run_crispresso, crispresso_cmds, descriptor)
+        p_wrapper = partial(wrapper, pFunc)
     idxs = range(len(crispresso_cmds))
     ret_vals = [None] * len(crispresso_cmds)
-    pFunc = partial(run_crispresso, crispresso_cmds, descriptor)
-    p_wrapper = partial(wrapper, pFunc)
     if start_end_percent is not None:
         percent_complete_increment = start_end_percent[1] - start_end_percent[0]
         percent_complete_step = percent_complete_increment / len(crispresso_cmds)
@@ -109,14 +110,24 @@ def run_crispresso_cmds(crispresso_cmds, n_processes="1", descriptor = 'region',
     signal.signal(signal.SIGINT, original_sigint_handler)
     try:
         completed = 0
-        for idx, res in pool.imap_unordered(p_wrapper, enumerate(idxs)):
-            ret_vals[idx] = res
-            completed += 1
-            percent_complete += percent_complete_step
-            logger.info(
-                "Completed {0}/{1} runs".format(completed, len(crispresso_cmds)),
-                {'percent_complete': percent_complete},
-            )
+        if int_n_processes == 1:
+            for idx, cmd in enumerate(crispresso_cmds):
+                ret_vals[idx] = run_crispresso(crispresso_cmds, descriptor, idx)
+                completed += 1
+                percent_complete += percent_complete_step
+                logger.info(
+                    "Completed {0}/{1} runs".format(completed, len(crispresso_cmds)),
+                    {'percent_complete': percent_complete},
+                )
+        else:
+            for idx, res in pool.imap_unordered(p_wrapper, enumerate(idxs)):
+                ret_vals[idx] = res
+                completed += 1
+                percent_complete += percent_complete_step
+                logger.info(
+                    "Completed {0}/{1} runs".format(completed, len(crispresso_cmds)),
+                    {'percent_complete': percent_complete},
+                )
         for idx, ret in enumerate(ret_vals):
             if ret == 137:
                 raise Exception('CRISPResso %s #%d was killed by your system. Please decrease the number of processes (-p) and run again.'%(descriptor, idx))
@@ -135,8 +146,10 @@ def run_crispresso_cmds(crispresso_cmds, n_processes="1", descriptor = 'region',
         if descriptor.endswith("ch") or descriptor.endswith("sh"):
             plural = descriptor+"es"
         logger.info("Finished all " + plural)
-        pool.close()
-    pool.join()
+        if int_n_processes > 1:
+            pool.close()
+    if int_n_processes > 1:
+        pool.join()
 
 def run_pandas_apply_parallel(input_df, input_function_chunk, n_processes=1):
     """

From 8034b6d370a1f2d447974d2b3b2c8b414533d83d Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Fri, 14 Jun 2024 13:51:24 -0600
Subject: [PATCH 09/16] Fix logger issue

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index 2f00f528..ceee642a 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -29,17 +29,18 @@ def run_crispresso(crispresso_cmds, descriptor, idx):
     idx: index of the command to run
     """
     crispresso_cmd=crispresso_cmds[idx]
+    logger = logging.getLogger(getmodule(stack()[1][0]).__name__)
 
-    logging.info('Running CRISPResso on %s #%d/%d: %s' % (descriptor, idx, len(crispresso_cmds), crispresso_cmd))
+    logger.info('Running CRISPResso on %s #%d/%d: %s' % (descriptor, idx, len(crispresso_cmds), crispresso_cmd))
 
     return_value = sb.call(crispresso_cmd, shell=True)
 
     if return_value == 137:
-        logging.warn('CRISPResso was killed by your system (return value %d) on %s #%d: "%s"\nPlease reduce the number of processes (-p) and run again.'%(return_value, descriptor, idx, crispresso_cmd))
+        logger.warn('CRISPResso was killed by your system (return value %d) on %s #%d: "%s"\nPlease reduce the number of processes (-p) and run again.'%(return_value, descriptor, idx, crispresso_cmd))
     elif return_value != 0:
-        logging.warn('CRISPResso command failed (return value %d) on %s #%d: "%s"'%(return_value, descriptor, idx, crispresso_cmd))
+        logger.warn('CRISPResso command failed (return value %d) on %s #%d: "%s"'%(return_value, descriptor, idx, crispresso_cmd))
     else:
-        logging.info('Finished CRISPResso %s #%d' %(descriptor, idx))
+        logger.info('Finished CRISPResso %s #%d' %(descriptor, idx))
     return return_value
 
 

From 56af94c74dd319f7bd4801fe8e6f772e8b4dee7c Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Tue, 30 Jul 2024 13:24:37 -0600
Subject: [PATCH 10/16] Catchup

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 52 ++++++++++++++----------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index ceee642a..ca1f9171 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -207,28 +207,38 @@ def run_function_on_array_chunk_parallel(input_array, input_function, n_processe
     input_function: function to run on chunks of the array
         input_function should take in a smaller array of objects
     """
-    pool = mp.Pool(processes = n_processes)
-
-    #handle signals -- bug in python 2.7 (https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python)
-    original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
-    signal.signal(signal.SIGINT, original_sigint_handler)
-    try:
-        n = int(max(10, len(input_array)/n_processes)) #don't parallelize unless at least 10 tasks
-        input_chunks = [input_array[i * n:(i + 1) * n] for i in range((len(input_array) + n - 1) // n )]
-        r = pool.map_async(input_function, input_chunks)
-        results = r.get(60*60*60) # Without the timeout this blocking call ignores all signals.
-    except KeyboardInterrupt:
-        pool.terminate()
-        logging.warn('Caught SIGINT. Program Terminated')
-        raise Exception('CRISPResso2 Terminated')
-        exit (0)
-    except Exception as e:
-        print('CRISPResso2 failed')
-        raise e
+    print("RUNNING NON-PARALLEL VERSION!!!!!!!")
+    if n_processes == 1:
+        print("NON-PARALLEL IF BRANCH!!")
+        try:
+            results = input_function(input_array)
+        except Exception as e:
+            print('CRISPResso2 failed')
+            raise e
+        return results
     else:
-        pool.close()
-    pool.join()
-    return [y for x in results for y in x]
+        pool = mp.Pool(processes = n_processes)
+
+        #handle signals -- bug in python 2.7 (https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python)
+        original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
+        signal.signal(signal.SIGINT, original_sigint_handler)
+        try:
+            n = int(max(10, len(input_array)/n_processes)) #don't parallelize unless at least 10 tasks
+            input_chunks = [input_array[i * n:(i + 1) * n] for i in range((len(input_array) + n - 1) // n )]
+            r = pool.map_async(input_function, input_chunks)
+            results = r.get(60*60*60) # Without the timeout this blocking call ignores all signals.
+        except KeyboardInterrupt:
+            pool.terminate()
+            logging.warn('Caught SIGINT. Program Terminated')
+            raise Exception('CRISPResso2 Terminated')
+            exit (0)
+        except Exception as e:
+            print('CRISPResso2 failed')
+            raise e
+        else:
+            pool.close()
+        pool.join()
+        return [y for x in results for y in x]
 
 
 

From fd2b258bb292ff1ac9e34b7e94f19b9ce223430a Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Thu, 8 Aug 2024 16:59:38 -0600
Subject: [PATCH 11/16] Remove extra print statements

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index ca1f9171..35fa02bd 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -207,9 +207,7 @@ def run_function_on_array_chunk_parallel(input_array, input_function, n_processe
     input_function: function to run on chunks of the array
         input_function should take in a smaller array of objects
     """
-    print("RUNNING NON-PARALLEL VERSION!!!!!!!")
     if n_processes == 1:
-        print("NON-PARALLEL IF BRANCH!!")
         try:
             results = input_function(input_array)
         except Exception as e:

From f79bf760904bf1d3d0a59e0a34aaaa49720dc91f Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 9 Aug 2024 13:07:48 -0600
Subject: [PATCH 12/16] Restrict generation of multiprocessing pool to when
 n_processes > 1

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index 35fa02bd..8bbe32b7 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -177,7 +177,10 @@ def input_function_chunk(df):
     #shuffle the dataset to avoid finishing all the ones on top while leaving the ones on the bottom unfinished
     n_splits = min(n_processes, len(input_df))
     df_split = np.array_split(input_df.sample(frac=1), n_splits)
-    pool = mp.Pool(processes = n_splits)
+    if n_processes > 1:
+        pool = mp.Pool(processes = n_splits)
+    else:
+        return input_function_chunk(input_df)
 
     #handle signals -- bug in python 2.7 (https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python)
     original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -243,12 +246,19 @@ def run_function_on_array_chunk_parallel(input_array, input_function, n_processe
 def run_subprocess(cmd):
     return sb.call(cmd, shell=True)
 
-def run_parallel_commands(commands_arr,n_processes=1,descriptor='CRISPResso2',continue_on_fail=False):
+def run_parallel_commands(commands_arr, n_processes=1, descriptor='CRISPResso2', continue_on_fail=False):
     """
     input: commands_arr: list of shell commands to run
     descriptor: string to print out to user describing run
     """
-    pool = mp.Pool(processes = n_processes)
+    if n_processes > 1:
+        pool = mp.Pool(processes = n_processes)
+    else:
+        for idx, command in enumerate(commands_arr):
+            return_value = run_subprocess(command)
+            if ret != 0 and not continue_on_fail:
+                raise Exception(f'{descriptor} #{idx} was failed')
+        return
 
     #handle signals -- bug in python 2.7 (https://stackoverflow.com/questions/11312525/catch-ctrlc-sigint-and-exit-multiprocesses-gracefully-in-python)
     original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
@@ -303,4 +313,3 @@ def run_plot(plot_func, plot_args, num_processes, process_futures, process_pool)
     except Exception as e:
         logger.warn(f"Plot error {e}, skipping plot \n")
         logger.debug(traceback.format_exc())
-

From 239b6e1f280d6245a4395537eb96db2e913f97fb Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 9 Aug 2024 13:30:35 -0600
Subject: [PATCH 13/16] Switch test branch to version bump

---
 .github/workflows/integration_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index ab66a06a..769397a8 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -64,6 +64,7 @@ jobs:
         uses: actions/checkout@v3
         with:
           repository: edilytics/CRISPResso2_tests
+          ref: 'cole/bump-version'
           # ref: '<BRANCH-NAME>' # update to specific branch
 
       - name: Run Basic

From 6833aac77efe35a2e8d6dc329be822b18fdfb5f4 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 9 Aug 2024 13:39:53 -0600
Subject: [PATCH 14/16] Fix variable name error

---
 CRISPResso2/CRISPRessoMultiProcessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CRISPResso2/CRISPRessoMultiProcessing.py b/CRISPResso2/CRISPRessoMultiProcessing.py
index 8bbe32b7..dba4c1ad 100644
--- a/CRISPResso2/CRISPRessoMultiProcessing.py
+++ b/CRISPResso2/CRISPRessoMultiProcessing.py
@@ -256,7 +256,7 @@ def run_parallel_commands(commands_arr, n_processes=1, descriptor='CRISPResso2',
     else:
         for idx, command in enumerate(commands_arr):
             return_value = run_subprocess(command)
-            if ret != 0 and not continue_on_fail:
+            if return_value != 0 and not continue_on_fail:
                 raise Exception(f'{descriptor} #{idx} was failed')
         return
 

From b6df0a53d177ae9c72da843fd10293449b830764 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 9 Aug 2024 15:11:32 -0600
Subject: [PATCH 15/16] Change test branch back to master

---
 .github/workflows/integration_tests.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index 769397a8..ab66a06a 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -64,7 +64,6 @@ jobs:
         uses: actions/checkout@v3
         with:
           repository: edilytics/CRISPResso2_tests
-          ref: 'cole/bump-version'
           # ref: '<BRANCH-NAME>' # update to specific branch
 
       - name: Run Basic

From 876abc247de63ad66ff30a28c3b11ce754dbcb07 Mon Sep 17 00:00:00 2001
From: Cole Lyman <Cole@colelyman.com>
Date: Fri, 9 Aug 2024 15:27:33 -0600
Subject: [PATCH 16/16] Fix CRISPRessoAggregate bug and other improvements
 (#95)

* D3-Enhancements (#78)

* Sam/try plots (#71)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Point test to try-plots

* Fix d3 not showing and plotly mixing with matplotlib

* Use logger for warnings and debug statements

* Point tests back at master

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

* Sam/fix plots (#72)

* Fix batch mode pandas warning. (#70)

* refactor to call method on DataFrame, rather than Series.
Removes warning.

* Fix pandas future warning in CRISPRessoWGS

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

* Functional

* Cole/fix status file name (#69)

* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>

* Try catch all futures

* Fix test fail plots

* Fix d3 not showing and plotly mixing with matplotlib

---------

Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

* Remove token from integration tests file

* Provide sgRNA_sequences to plot_nucleotide_quilt plots

* Passing sgRNA_sequences to plot

* Refactor check for determining when to use CRISPREssoPro or matplotlib for Batch plots

* Add max-height to Batch report samples

* Change testing branch

* Fix wrong check for large Batch plots

* Fix typo and move flexiguide to debug (#77)

* Change flexiguide output to debug level

* Fix typo in fastp merged output file name

* Adding id tags for d3 script enhancements

* pointing to test branch

* Add amplicon_name parameter to allele heatmap and line plots

* Add function to extract quantification window regions from include_idxs

* Scale the quantification window according to the coordinates of the sgRNA plot

* added c2pro check, added space in args.json

* Correct the quantification window indexes for multiple guides

* Fix name of nucleotide conversion plot when guides are not the same

* Fix jinja variables that aren't found

* Fix multiple guide errors where the wrong sgRNA sequence was associated in d3 plot

* Remove unneeded variable and extra whitespace

* Switch test branch to master

---------

Co-authored-by: Samuel Nichols <Snic9004@gmail.com>
Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Co-authored-by: Cole Lyman <cole@colelyman.com>

* Add amplicon_name to plot functions

* Add sgRNA sequences to nucleotide quilt parameters in Aggregate

* Add custom_colors to Aggregate plot functions

* Update Aggregate and make_aggregate_report to have logger and tool

* Write command_used to Aggregate .json info file

* Point to new test branch and add Aggregate run

* Make the order of Aggregate runs explicit

* Sort all instances of crispresso2_folder_info in Aggregate

* Sort df_summary_quantification df in Aggregate

* Try sorting with a list of single column

* Update to correct test branch

* Move to master test branch

---------

Co-authored-by: Trevor Martin <60452953+trevormartinj7@users.noreply.github.com>
Co-authored-by: Samuel Nichols <Snic9004@gmail.com>
Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
---
 .github/workflows/integration_tests.yml       |  5 ++++
 CRISPResso2/CRISPRessoAggregateCORE.py        | 29 ++++++++++++++-----
 .../CRISPRessoReports/CRISPRessoReport.py     |  4 +++
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index ab66a06a..ff91670b 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -114,3 +114,8 @@ jobs:
         if: success() || failure()
         run: |
           make compare test print
+
+      - name: Run Aggregate
+        if: success() || failure()
+        run: |
+          make aggregate test print
diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
index 7e5b9b0d..d319445d 100644
--- a/CRISPResso2/CRISPRessoAggregateCORE.py
+++ b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -109,6 +109,7 @@ def main():
         crispresso2_info = {'running_info': {}, 'results': {'alignment_stats': {}, 'general_plots': {}}} #keep track of all information for this run to be pickled and saved at the end of the run
         crispresso2_info['running_info']['version'] = CRISPRessoShared.__version__
         crispresso2_info['running_info']['args'] = deepcopy(args)
+        crispresso2_info['running_info']['command_used'] = ' '.join(sys.argv)
 
         crispresso2_info['running_info']['log_filename'] = os.path.basename(log_filename)
 
@@ -227,7 +228,7 @@ def main():
 
         if successfully_imported_count > 0:
 
-            crispresso2_folders = crispresso2_folder_infos.keys()
+            crispresso2_folders = list(sorted(crispresso2_folder_infos.keys()))
             crispresso2_folder_names = {}
             crispresso2_folder_htmls = {}#file_loc->html folder loc
             quilt_plots_to_show = {}  # name->{'href':path to report, 'img': png}
@@ -515,8 +516,10 @@ def main():
                                     'fig_filename_root': this_window_nuc_pct_quilt_plot_name,
                                     'save_also_png': save_png,
                                     'sgRNA_intervals': sub_sgRNA_intervals,
+                                    'sgRNA_sequences': consensus_guides,
                                     'quantification_window_idxs': include_idxs,
                                     'group_column': 'Folder',
+                                    'custom_colors': None,
                                 }
                                 plot(
                                     CRISPRessoPlot.plot_nucleotide_quilt,
@@ -550,8 +553,10 @@ def main():
                                     'fig_filename_root': this_nuc_pct_quilt_plot_name,
                                     'save_also_png': save_png,
                                     'sgRNA_intervals': consensus_sgRNA_intervals,
+                                    'sgRNA_sequences': consensus_guides,
                                     'quantification_window_idxs': include_idxs,
                                     'group_column': 'Folder',
+                                    'custom_colors': None,
                                 }
                                 plot(
                                     CRISPRessoPlot.plot_nucleotide_quilt,
@@ -589,8 +594,10 @@ def main():
                                     'fig_filename_root': this_nuc_pct_quilt_plot_name,
                                     'save_also_png': save_png,
                                     'sgRNA_intervals': consensus_sgRNA_intervals,
+                                    'sgRNA_sequences': consensus_guides,
                                     'quantification_window_idxs': consensus_include_idxs,
                                     'group_column': 'Folder',
+                                    'custom_colors': None,
                                 }
                                 plot(
                                     CRISPRessoPlot.plot_nucleotide_quilt,
@@ -654,6 +661,7 @@ def main():
                                 'plot_path': plot_path,
                                 'title': modification_type,
                                 'div_id': heatmap_div_id,
+                                'amplicon_name': amplicon_name,
                             }
                             plot(
                                 CRISPRessoPlot.plot_allele_modification_heatmap,
@@ -687,6 +695,7 @@ def main():
                                 'plot_path': plot_path,
                                 'title': modification_type,
                                 'div_id': line_div_id,
+                                'amplicon_name': amplicon_name,
                             }
                             plot(
                                 CRISPRessoPlot.plot_allele_modification_line,
@@ -779,7 +788,7 @@ def main():
 
             header = 'Name\tUnmodified%\tModified%\tReads_total\tReads_aligned\tUnmodified\tModified\tDiscarded\tInsertions\tDeletions\tSubstitutions\tOnly Insertions\tOnly Deletions\tOnly Substitutions\tInsertions and Deletions\tInsertions and Substitutions\tDeletions and Substitutions\tInsertions Deletions and Substitutions'
             header_els = header.split("\t")
-            df_summary_quantification=pd.DataFrame(quantification_summary, columns=header_els)
+            df_summary_quantification=pd.DataFrame(quantification_summary, columns=header_els).sort_values(by=['Name'])
             samples_quantification_summary_filename = _jp('CRISPRessoAggregate_quantification_of_editing_frequency.txt') #this file has one line for each run (sum of all amplicons)
             df_summary_quantification.fillna('NA').to_csv(samples_quantification_summary_filename, sep='\t', index=None)
             crispresso2_info['results']['alignment_stats']['samples_quantification_summary_filename'] = os.path.basename(samples_quantification_summary_filename)
@@ -841,11 +850,17 @@ def main():
                 report_filename = OUTPUT_DIRECTORY+'.html'
                 if (args.place_report_in_output_folder):
                     report_filename = _jp("CRISPResso2Aggregate_report.html")
-                CRISPRessoReport.make_aggregate_report(crispresso2_info, args.name,
-                                                       report_filename, OUTPUT_DIRECTORY,
-                                                       _ROOT, crispresso2_folders,
-                                                       crispresso2_folder_htmls,
-                                                       quilt_plots_to_show)
+                CRISPRessoReport.make_aggregate_report(
+                    crispresso2_info,
+                    args.name,
+                    report_filename,
+                    OUTPUT_DIRECTORY,
+                    _ROOT,
+                    crispresso2_folders,
+                    crispresso2_folder_htmls,
+                    logger,
+                    compact_plots_to_show=quilt_plots_to_show,
+                )
                 crispresso2_info['running_info']['report_location'] = report_filename
                 crispresso2_info['running_info']['report_filename'] = os.path.basename(report_filename)
         else: #no files successfully imported
diff --git a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
index eb16730a..c6860fdc 100644
--- a/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
+++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py
@@ -646,6 +646,7 @@ def make_aggregate_report(
     _ROOT,
     folder_arr,
     crispresso_html_reports,
+    logger,
     compact_plots_to_show=None,
     display_names=None,
 ):
@@ -660,6 +661,7 @@ def make_aggregate_report(
     _ROOT (string): location of crispresso assets (images, templates, etc)
     folder_arr (arr of strings): paths to the aggregated crispresso folders
     crispresso_html_reports (dict): folder->html_path; Paths to the aggregated crispresso run html reports
+    logger (logging.Logger): logger to log messages
     compact_plots_to_show (dict): name=>{'href': path to target(report) when user clicks on image, 'img': path to png image to show}
     display_names (dict): folder->display_name; Titles to be shown for crispresso runs
         (if different from names_arr, e.g. if display_names have spaces or bad chars, they won't be the same as names_arr)
@@ -778,6 +780,8 @@ def make_aggregate_report(
         crispresso_report_folder,
         _ROOT,
         report_name,
+        'aggregate',
+        logger,
         window_nuc_pct_quilts=window_nuc_pct_quilts,
         nuc_pct_quilts=nuc_pct_quilts,
         summary_plots=summary_plots,