From 020c6cc5b3778d0e1b291a1b802c3158e186bd97 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Tue, 23 Apr 2024 16:13:50 -0600
Subject: [PATCH 01/14] Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.
---
 CRISPResso2/CRISPRessoShared.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py
index 036d48d9..4af227fe 100644
--- a/CRISPResso2/CRISPRessoShared.py
+++ b/CRISPResso2/CRISPRessoShared.py
@@ -1856,9 +1856,11 @@ def check_custom_config(args):
                 custom_config['colors'] = config['colors']
 
             return custom_config
-        except Exception as e:
-            logger.warn("Cannot read json file '%s', defaulting config parameters." % args.config_file)
-            print(e)
+        except Exception:
+            if args.config_file:
+                logger.warn("Cannot read config file '%s', defaulting config parameters." % args.config_file)
+            else:
+                logger.warn("No config file provided, defaulting config parameters.")
     return config
 
 

From 80a82a6d6a004b29d43655b6be89d9c5a90d101a Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 24 Apr 2024 13:01:09 -0600
Subject: [PATCH 02/14] Fix divide by zero when no amplicons are present in
 Batch mode

---
 CRISPResso2/CRISPRessoBatchCORE.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index 0e198d66..fee6feed 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -398,7 +398,10 @@ def main():
         large_plot_cutoff = 300
 
         percent_complete_start, percent_complete_end = 90, 99
-        percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
+        if all_amplicons:
+            percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
+        else:
+            percent_complete_step = 0
         # report for amplicons
         for amplicon_index, amplicon_seq in enumerate(all_amplicons):
             # only perform comparison if amplicon seen in more than one sample

From b3f8f2da4f583880c241ffd1a47237c88413ab2c Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 24 Apr 2024 13:11:07 -0600
Subject: [PATCH 03/14] Don't append file_prefix to status file name

---
 CRISPResso2/CRISPRessoAggregateCORE.py        | 4 ++--
 CRISPResso2/CRISPRessoCORE.py                 | 2 +-
 CRISPResso2/CRISPRessoCompareCORE.py          | 2 +-
 CRISPResso2/CRISPRessoMetaCORE.py             | 2 +-
 CRISPResso2/CRISPRessoPooledCORE.py           | 2 +-
 CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 2 +-
 CRISPResso2/CRISPRessoWGSCORE.py              | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
index b521dac7..10e36a43 100644
--- a/CRISPResso2/CRISPRessoAggregateCORE.py
+++ b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -71,7 +71,7 @@ def main():
 
         parser.add_argument('--debug', help='Show debug messages', action='store_true')
         parser.add_argument('-v', '--verbosity', type=int, help='Verbosity level of output to the console (1-4), 4 is the most verbose', default=3)
-        
+
         # CRISPRessoPro params
         parser.add_argument('--use_matplotlib', action='store_true',
                         help='Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed')
@@ -98,7 +98,7 @@ def main():
 
         log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoAggregate_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoAggregate_status.json'))
 
         with open(log_filename, 'w+') as outfile:
               outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
index b3dd67c8..92ed7cc7 100644
--- a/CRISPResso2/CRISPRessoCORE.py
+++ b/CRISPResso2/CRISPRessoCORE.py
@@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug():
             with open(log_filename, 'w+') as outfile:
                 outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write))
 
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPResso_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPResso_status.json'))
 
         aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL")
         CRISPRessoShared.check_file(aln_matrix_loc)
diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index 718d4442..9d8041a4 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -142,7 +142,7 @@ def main():
 
         log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoCompare_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoCompare_status.json'))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write('[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py
index 5fff6ab6..a771e24f 100644
--- a/CRISPResso2/CRISPRessoMetaCORE.py
+++ b/CRISPResso2/CRISPRessoMetaCORE.py
@@ -233,7 +233,7 @@ def main():
 
         log_filename=_jp('CRISPRessoMeta_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoMeta_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoMeta_status.json'))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py
index 124ec705..8f7305c7 100644
--- a/CRISPResso2/CRISPRessoPooledCORE.py
+++ b/CRISPResso2/CRISPRessoPooledCORE.py
@@ -327,7 +327,7 @@ def main():
 
         log_filename = _jp('CRISPRessoPooled_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooled_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooled_status.json'))
 
         if args.zip_output and not args.place_report_in_output_folder:
             logger.warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.')
diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
index b830e222..bfb22370 100644
--- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
+++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
@@ -231,7 +231,7 @@ def main():
 
         log_filename = _jp('CRISPRessoPooledWGSCompare_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooledWGSCompare_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooledWGSCompare_status.json'))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write(
diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py
index e8042c01..aea9dbfb 100644
--- a/CRISPResso2/CRISPRessoWGSCORE.py
+++ b/CRISPResso2/CRISPRessoWGSCORE.py
@@ -347,7 +347,7 @@ def print_stacktrace_if_debug():
         except:
             warn('Folder %s already exists.' % OUTPUT_DIRECTORY)
 
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoWGS_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoWGS_status.json'))
 
         info('Checking dependencies...')
 

From 4390b862150658040ce86b4e18c368a584de522f Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 24 Apr 2024 13:53:53 -0600
Subject: [PATCH 04/14] Place status files in output directories

---
 CRISPResso2/CRISPRessoAggregateCORE.py        | 2 +-
 CRISPResso2/CRISPRessoBatchCORE.py            | 2 +-
 CRISPResso2/CRISPRessoCORE.py                 | 2 +-
 CRISPResso2/CRISPRessoCompareCORE.py          | 2 +-
 CRISPResso2/CRISPRessoMetaCORE.py             | 2 +-
 CRISPResso2/CRISPRessoPooledCORE.py           | 2 +-
 CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 2 +-
 CRISPResso2/CRISPRessoWGSCORE.py              | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
index 10e36a43..5cd4cba9 100644
--- a/CRISPResso2/CRISPRessoAggregateCORE.py
+++ b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -98,7 +98,7 @@ def main():
 
         log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoAggregate_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoAggregate_status.json')))
 
         with open(log_filename, 'w+') as outfile:
               outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index fee6feed..0dfb5953 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -127,7 +127,7 @@ def main():
 
         log_filename = _jp('CRISPRessoBatch_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        status_handler = CRISPRessoShared.StatusHandler(_jp('CRISPRessoBatch_status.json'))
+        status_handler = CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoBatch_status.json'))
         logger.addHandler(status_handler)
 
         with open(log_filename, 'w+') as outfile:
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
index 92ed7cc7..a1590604 100644
--- a/CRISPResso2/CRISPRessoCORE.py
+++ b/CRISPResso2/CRISPRessoCORE.py
@@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug():
             with open(log_filename, 'w+') as outfile:
                 outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write))
 
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPResso_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_status.json')))
 
         aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL")
         CRISPRessoShared.check_file(aln_matrix_loc)
diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index 9d8041a4..a47a05e0 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -142,7 +142,7 @@ def main():
 
         log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoCompare_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoCompare_status.json')))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write('[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py
index a771e24f..4afd6d8d 100644
--- a/CRISPResso2/CRISPRessoMetaCORE.py
+++ b/CRISPResso2/CRISPRessoMetaCORE.py
@@ -233,7 +233,7 @@ def main():
 
         log_filename=_jp('CRISPRessoMeta_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoMeta_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoMeta_status.json')))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py
index 8f7305c7..b2fe7207 100644
--- a/CRISPResso2/CRISPRessoPooledCORE.py
+++ b/CRISPResso2/CRISPRessoPooledCORE.py
@@ -327,7 +327,7 @@ def main():
 
         log_filename = _jp('CRISPRessoPooled_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooled_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooled_status.json')))
 
         if args.zip_output and not args.place_report_in_output_folder:
             logger.warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.')
diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
index bfb22370..538afc9f 100644
--- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
+++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
@@ -231,7 +231,7 @@ def main():
 
         log_filename = _jp('CRISPRessoPooledWGSCompare_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooledWGSCompare_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooledWGSCompare_status.json')))
 
         with open(log_filename, 'w+') as outfile:
             outfile.write(
diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py
index aea9dbfb..ccb27df9 100644
--- a/CRISPResso2/CRISPRessoWGSCORE.py
+++ b/CRISPResso2/CRISPRessoWGSCORE.py
@@ -347,7 +347,7 @@ def print_stacktrace_if_debug():
         except:
             warn('Folder %s already exists.' % OUTPUT_DIRECTORY)
 
-        logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoWGS_status.json'))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoWGS_status.json')))
 
         info('Checking dependencies...')
 

From b9daad380e79634e345913a6235ba913d8add22d Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 24 Apr 2024 16:12:47 -0600
Subject: [PATCH 05/14] Update tests branch for file_prefix addition

---
 .github/workflows/integration_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index 7d968708..02b60904 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -46,7 +46,7 @@ jobs:
       with:
         repository: edilytics/CRISPResso2_tests
         token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }}
-        # ref: '<BRANCH-NAME>' # Use this to specify a branch other than master
+        ref: 'cole/add-file-prefix-to-batch' # Use this to specify a branch other than master
 
     - name: Run Basic
       run: |

From efe18d915dd9c1638348c37bd54d8b73e0bb8e8c Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Thu, 25 Apr 2024 10:34:33 -0600
Subject: [PATCH 06/14] Load D3 and plotly figures with pro with multiple
 amplicons

---
 CRISPResso2/CRISPRessoBatchCORE.py                   | 12 ++++++------
 .../CRISPRessoReports/templates/batchReport.html     | 12 ++----------
 .../templates/shared/partials/fig_summaries.html     |  1 +
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index 0dfb5953..b9c85099 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -607,7 +607,7 @@ def main():
                                 # and add it to the list
                                 sub_sgRNA_intervals.append((newstart, newend))
 
-                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
+                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
                             nucleotide_quilt_input = {
                                 'nuc_pct_df': sub_nucleotide_percentage_summary_df,
                                 'mod_pct_df': sub_modification_percentage_summary_df,
@@ -620,13 +620,11 @@ def main():
                             debug('Plotting nucleotide percentage quilt for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA))
                             plot(
                                 CRISPRessoPlot.plot_nucleotide_quilt,
-                                nucleotide_quilt_input,
+                                nucleotide_quilt_input, 
                             )
                             plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name)
                             window_nuc_pct_quilt_plot_names.append(plot_name)
                             crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'sgRNA: ' + sgRNA + ' Amplicon: ' + amplicon_name
-                            if len(consensus_guides) == 1:
-                                crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = ''
                             crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name
                             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
 
@@ -659,7 +657,7 @@ def main():
                         # done with per-sgRNA plots
 
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:  # plot the whole region
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,
@@ -709,7 +707,7 @@ def main():
 
                 else:  # guides are not the same
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,
@@ -778,6 +776,7 @@ def main():
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
+                            'amplicon_name': amplicon_name,
                         }
                         debug('Plotting allele modification heatmap for {0}'.format(amplicon_name))
                         plot(
@@ -809,6 +808,7 @@ def main():
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
+                            'amplicon_name': amplicon_name,
                         }
                         debug('Plotting allele modification line plot for {0}'.format(amplicon_name))
                         plot(
diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
index fe4a0200..59bd6f5b 100644
--- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html
+++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html
@@ -77,11 +77,7 @@ <h5>Nucleotide percentages around guides</h5>
               <div class='card-body'>
                 {% for plot_name in window_nuc_pct_quilts %}
                   <h5>{{report_data['titles'][plot_name]}}</h5>
-                  {% if plot_name in report_data['htmls'] %}
-                    {{ report_data['htmls'][plot_name]|safe }}
-                  {% else %}
-                    {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }}
-                  {% endif %}
+                  {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }}
                 {% endfor %}
               </div>
             </div>
@@ -95,11 +91,7 @@ <h5>Nucleotide percentages in the entire amplicon</h5>
               <div class='card-body'>
                 {% for plot_name in nuc_pct_quilts %}
                   <h5>{{report_data['titles'][plot_name]}}</h5>
-                  {% if plot_name in report_data['htmls'] %}
-                    {{ report_data['htmls'][plot_name]|safe }}
-                  {% else %}
-                    {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }}
-                  {% endif %}
+                  {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }}
                 {% endfor %}
               </div>
             </div>
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html
index 63d40dea..d909a0b2 100644
--- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html
+++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html
@@ -12,4 +12,5 @@
     {% for (data_label,data_path) in report_data['datas'][plot_name] %}
         <p class="m-0"><small>Data: <a href="{{report_data['crispresso_data_path']}}{{data_path}}">{{data_label}}</a></small></p>
     {% endfor %}
+    <br>
 </div>

From a49639da816109e1d87a073f9b3206022c30e142 Mon Sep 17 00:00:00 2001
From: Sam <snic9004@gmail.com>
Date: Thu, 25 Apr 2024 14:38:16 -0600
Subject: [PATCH 07/14] Update batch

---
 CRISPResso2/CRISPRessoBatchCORE.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index b9c85099..e27e93f3 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -607,7 +607,7 @@ def main():
                                 # and add it to the list
                                 sub_sgRNA_intervals.append((newstart, newend))
 
-                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
+                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
                             nucleotide_quilt_input = {
                                 'nuc_pct_df': sub_nucleotide_percentage_summary_df,
                                 'mod_pct_df': sub_modification_percentage_summary_df,
@@ -657,7 +657,7 @@ def main():
                         # done with per-sgRNA plots
 
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:  # plot the whole region
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,
@@ -707,7 +707,7 @@ def main():
 
                 else:  # guides are not the same
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,

From 62cf9fc4e224225e7b4d90b47fede4ae493f7d4e Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Thu, 25 Apr 2024 09:18:59 -0600
Subject: [PATCH 08/14] Fix bug in CRISPRessoCompare with pointing to report
 datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.
---
 CRISPResso2/CRISPRessoCompareCORE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index a47a05e0..48b313cb 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -238,7 +238,7 @@ def get_plot_title_with_ref_name(plotTitle, refName):
             crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Editing efficiency comparison'
             crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Figure 1: Comparison for amplicon ' + amplicon_name + '; Left: Percentage of modified and unmodified reads in each sample; Right: relative percentage of modified and unmodified reads'
             output_1 = os.path.join(args.crispresso_output_folder_1, run_info_1['running_info']['report_filename'])
-            output_2 = os.path.join(args.crispresso_output_folder_1, run_info_2['running_info']['report_filename'])
+            output_2 = os.path.join(args.crispresso_output_folder_2, run_info_2['running_info']['report_filename'])
             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = []
             if os.path.isfile(output_1):
                 crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name].append((sample_1_name +' output', os.path.relpath(output_1, OUTPUT_DIRECTORY)))

From cddcf5953822ef03c8762792291a9c3e8139c388 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 26 Apr 2024 13:48:25 -0600
Subject: [PATCH 09/14] Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available
---
 CRISPResso2/CRISPRessoBatchCORE.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index e27e93f3..53211012 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -18,7 +18,7 @@
 from CRISPResso2.CRISPRessoReports import CRISPRessoReport
 
 if CRISPRessoShared.is_C2Pro_installed():
-    from CRISPRessoPro import __version__ as CRISPRessoProVersion
+    import CRISPRessoPro
     C2PRO_INSTALLED = True
 else:
     C2PRO_INSTALLED = False

From c7c0ab2bb5465c0e5317adb52bf1e3dd5f1ddc56 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 26 Apr 2024 13:49:22 -0600
Subject: [PATCH 10/14] Remove `get_amplicon_output` unused function from
 CRISPRessoCompare

Also remove unused argparse import
---
 CRISPResso2/CRISPRessoCompareCORE.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index 48b313cb..1e55c342 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -8,7 +8,6 @@
 from copy import deepcopy
 import sys
 import traceback
-import argparse
 from CRISPResso2 import CRISPRessoShared
 from CRISPResso2.CRISPRessoReports import CRISPRessoReport
 
@@ -32,13 +31,6 @@ def check_library(library_name):
                 sys.exit(1)
 
 
-def get_amplicon_output(amplicon_name, output_folder):
-    profile_file=os.path.join(output_folder, amplicon_name+'.effect_vector_combined.txt')
-    if os.path.exists(quantification_file) and profile_file:
-        return quantification_file, profile_file
-    else:
-        raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder, profile_file, amplicon_name))
-
 def parse_profile(profile_file):
     return np.loadtxt(profile_file, skiprows=1)
 

From 43974a2aba9c5c699eda3328187b9da43adcbe8d Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 26 Apr 2024 13:55:13 -0600
Subject: [PATCH 11/14] Implement `get_matching_allele_files` in
 CRISPRessoCompare and accompanying unit tests

---
 CRISPResso2/CRISPRessoCompareCORE.py          | 133 ++++++++++--------
 .../unit_tests/test_CRISPRessoCompareCORE.py  |  65 +++++++++
 2 files changed, 142 insertions(+), 56 deletions(-)
 create mode 100644 tests/unit_tests/test_CRISPRessoCompareCORE.py

diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index 1e55c342..55b758e4 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -63,6 +63,33 @@ def normalize_name(name, output_folder_1, output_folder_2):
         return name
 
 
+def get_matching_allele_files(run_info_1, run_info_2):
+    def get_amplicon_info(run_info):
+        return {
+            amplicon['sequence']: {
+                'name': amplicon_name,
+                'guides': amplicon['sgRNA_orig_sequences'],
+                'cut_points': amplicon['sgRNA_cut_points'],
+                'allele_files': amplicon['allele_frequency_files'],
+            }
+            for amplicon_name, amplicon in run_info['results']['refs'].items()
+        }
+    amplicons_1 = get_amplicon_info(run_info_1)
+    amplicons_2 = get_amplicon_info(run_info_2)
+    matching_allele_files = []
+    for sequence_1 in amplicons_1:
+        if sequence_1 in amplicons_2:
+            if amplicons_1[sequence_1]['guides'] != amplicons_2[sequence_1]['guides']:
+                warn(f'Report 1 has different guides than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison')
+                continue
+            if amplicons_1[sequence_1]['cut_points'] != amplicons_2[sequence_1]['cut_points']:
+                warn(f'Report 1 has different cut points than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison')
+                continue
+            matching_allele_files.extend((f_1, f_2) for f_1, f_2 in zip(amplicons_1[sequence_1]['allele_files'], amplicons_2[sequence_1]['allele_files']))
+
+    return matching_allele_files
+
+
 def main():
     try:
         description = ['~~~CRISPRessoCompare~~~', '-Comparison of two CRISPResso analyses-']
@@ -342,62 +369,56 @@ def get_plot_title_with_ref_name(plotTitle, refName):
 
 
             #create merged heatmaps for each cut site
-            allele_files_1 = amplicon_info_1[amplicon_name]['allele_files']
-            allele_files_2 = amplicon_info_2[amplicon_name]['allele_files']
-            for allele_file_1 in allele_files_1:
-                allele_file_1_name = os.path.split(allele_file_1)[1] #get file part of path
-                for allele_file_2 in allele_files_2:
-                    allele_file_2_name = os.path.split(allele_file_2)[1] #get file part of path
-                    #if files are the same (same amplicon, cut site, guide), run comparison
-                    if allele_file_1_name == allele_file_2_name:
-                        df1 = pd.read_csv(allele_file_1, sep="\t")
-                        df2 = pd.read_csv(allele_file_2, sep="\t")
-
-                        #find unmodified reference for comparison (if it exists)
-                        ref_seq_around_cut = ""
-                        if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0:
-                            ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0]
-                        #otherwise figure out which sgRNA was used for this comparison
-                        elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0:
-                            ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0]
-                        else:
-                            seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0]
-                            for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points):
-                                sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]]
-                                if sgRNA_seq in allele_file_1_name:
-                                    this_sgRNA_seq = sgRNA_seq
-                                    this_cut_point = cut_point
-                                    ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)]
-                                    break
-
-                        merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer')
-                        quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name]
-                        merged[quant_cols] = merged[quant_cols].fillna(0)
-                        lfc_error =0.1
-                        merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0)
-                        merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False)
-                        merged = merged.reset_index(drop=True).set_index('Aligned_Sequence')
-                        output_root = allele_file_1_name.replace(".txt", "")
-                        allele_comparison_file = _jp(output_root+'.txt')
-                        merged.to_csv(allele_comparison_file, sep="\t", index=None)
-
-                        plot_name = '3.'+output_root+'_top'
-                        CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name),
-                                    MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png)
-                        crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name)
-                        crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name
-                        crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
-                        'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.'
-                        crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))]
-
-                        plot_name = '3.'+output_root+'_bottom'
-                        CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name),
-                                    MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png)
-                        crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name)
-                        crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name
-                        crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
-                        'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.'
-                        crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))]
+            matching_allele_files = get_matching_allele_files(run_info_1, run_info_2)
+            for allele_file_1, allele_file_2 in matching_allele_files:
+                df1 = pd.read_csv(os.path.join(args.crispresso_output_folder_1, allele_file_1), sep="\t")
+                df2 = pd.read_csv(os.path.join(args.crispresso_output_folder_2, allele_file_2), sep="\t")
+
+                #find unmodified reference for comparison (if it exists)
+                ref_seq_around_cut = ""
+                if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0:
+                    ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0]
+                #otherwise figure out which sgRNA was used for this comparison
+                elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0:
+                    ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0]
+                else:
+                    seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0]
+                    for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points):
+                        sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]]
+                        if sgRNA_seq in allele_file_1:
+                            this_sgRNA_seq = sgRNA_seq
+                            this_cut_point = cut_point
+                            ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)]
+                            break
+
+                merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer')
+                quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name]
+                merged[quant_cols] = merged[quant_cols].fillna(0)
+                lfc_error =0.1
+                merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0)
+                merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False)
+                merged = merged.reset_index(drop=True).set_index('Aligned_Sequence')
+                args.crispresso_output_folder_root = os.path.split(allele_file_1)[1].replace(".txt", "")
+                allele_comparison_file = _jp(args.crispresso_output_folder_root+'.txt')
+                merged.to_csv(allele_comparison_file, sep="\t", index=None)
+
+                plot_name = '3.'+args.crispresso_output_folder_root+'_top'
+                CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name),
+                            MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png)
+                crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name)
+                crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name
+                crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
+                'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.'
+                crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))]
+
+                plot_name = '3.'+args.crispresso_output_folder_root+'_bottom'
+                CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name),
+                            MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png)
+                crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name)
+                crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name
+                crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \
+                'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.'
+                crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))]
 
         debug('Calculating significant base counts...', {'percent_complete': 95})
         sig_counts_filename = _jp('CRISPRessoCompare_significant_base_counts.txt')
diff --git a/tests/unit_tests/test_CRISPRessoCompareCORE.py b/tests/unit_tests/test_CRISPRessoCompareCORE.py
new file mode 100644
index 00000000..9bbe7e16
--- /dev/null
+++ b/tests/unit_tests/test_CRISPRessoCompareCORE.py
@@ -0,0 +1,65 @@
+"""Unit tests for CRISPRessoCompareCORE."""
+
+from CRISPResso2 import CRISPRessoCompareCORE
+
+from copy import deepcopy
+import pytest
+
+
+@pytest.fixture(scope='function')
+def run_info():
+    return {
+        'results': {
+            'refs': {
+                'Reference': {
+                    'sequence':'CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGG',
+                    'sgRNA_orig_sequences': ['GGCCCTTAAAA'],
+                    'sgRNA_cut_points': [50],
+                    'allele_frequency_files': ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'],
+                },
+            },
+        },
+    }
+
+
+@pytest.fixture(scope='function')
+def run_info_1(run_info):
+    return deepcopy(run_info)
+
+
+@pytest.fixture(scope='function')
+def run_info_2(run_info):
+    return deepcopy(run_info)
+
+
+def test_get_matching_allele_files(run_info):
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info, run_info)
+    assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')]
+
+
+def test_get_matching_allele_files_different_cut_points(run_info_1, run_info_2):
+    run_info_2['results']['refs']['Reference']['sgRNA_cut_points'] = [50, 51]
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == []
+
+
+def test_get_matching_allele_files_different_guides(run_info_1, run_info_2):
+    run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] = ['GGCCCTTAAAC']
+    run_info_2['results']['refs']['Reference']['allele_frequency_files'] = ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAC.txt']
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == []
+
+
+def test_get_matching_allele_files_multiple_alleles(run_info_1, run_info_2):
+    run_info_1['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference'])
+    run_info_1['results']['refs']['Other_Amplicon']['sequence'] = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAA'
+    run_info_1['results']['refs']['Other_Amplicon']['allele_frequency_files'] = ['Other_Amplicon.Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt']
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')]
+
+
+def test_get_matching_allele_files_different_amplicon_names_same_sequence(run_info_1, run_info_2):
+    run_info_2['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference'])
+    del run_info_2['results']['refs']['Reference']
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')]

From 517671f9b6729c1570bafce871c56b64f92fdfa3 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Fri, 26 Apr 2024 14:20:04 -0600
Subject: [PATCH 12/14] Allow for matching of multiple guides in the same
 amplicon

---
 CRISPResso2/CRISPRessoCompareCORE.py          |  8 +++++---
 .../unit_tests/test_CRISPRessoCompareCORE.py  | 20 +++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
index 55b758e4..d448b1e0 100644
--- a/CRISPResso2/CRISPRessoCompareCORE.py
+++ b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -79,12 +79,14 @@ def get_amplicon_info(run_info):
     matching_allele_files = []
     for sequence_1 in amplicons_1:
         if sequence_1 in amplicons_2:
-            if amplicons_1[sequence_1]['guides'] != amplicons_2[sequence_1]['guides']:
-                warn(f'Report 1 has different guides than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison')
-                continue
             if amplicons_1[sequence_1]['cut_points'] != amplicons_2[sequence_1]['cut_points']:
                 warn(f'Report 1 has different cut points than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison')
                 continue
+            guides_1 = set(amplicons_1[sequence_1]['guides'])
+            guides_2 = set(amplicons_2[sequence_1]['guides'])
+            if not guides_1 & guides_2:
+                warn(f'Report 1 has no shared guides with report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison')
+                continue
             matching_allele_files.extend((f_1, f_2) for f_1, f_2 in zip(amplicons_1[sequence_1]['allele_files'], amplicons_2[sequence_1]['allele_files']))
 
     return matching_allele_files
diff --git a/tests/unit_tests/test_CRISPRessoCompareCORE.py b/tests/unit_tests/test_CRISPRessoCompareCORE.py
index 9bbe7e16..6d84019f 100644
--- a/tests/unit_tests/test_CRISPRessoCompareCORE.py
+++ b/tests/unit_tests/test_CRISPRessoCompareCORE.py
@@ -63,3 +63,23 @@ def test_get_matching_allele_files_different_amplicon_names_same_sequence(run_in
     del run_info_2['results']['refs']['Reference']
     matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
     assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')]
+
+
+def test_get_matching_allele_files_some_different_guides(run_info_1, run_info_2):
+    run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA']
+    run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt']
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')]
+
+
+def test_get_matching_allele_files_multiple_guides(run_info_1, run_info_2):
+    run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA']
+    run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt']
+    run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA']
+    run_info_2['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt']
+    matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2)
+    assert matching_allele_files == [
+        ('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'),
+        ('Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt', 'Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'),
+    ]
+

From ebe8276070abba74d3f5a7e6e71dea60f0dffa33 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 1 May 2024 14:45:28 -0600
Subject: [PATCH 13/14] Fix pandas FutureWarning

---
 CRISPResso2/CRISPRessoBatchCORE.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
index de31b142..4c60e2da 100644
--- a/CRISPResso2/CRISPRessoBatchCORE.py
+++ b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -178,7 +178,7 @@ def main():
                        'plot_window_size', 'max_rows_alleles_around_cut_to_plot']
         for int_col in int_columns:
             if int_col in batch_params.columns:
-                batch_params[int_col].fillna(getattr(args, int_col), inplace=True)
+                batch_params.fillna(value={int_col: getattr(args, int_col)}, inplace=True)
                 batch_params[int_col] = batch_params[int_col].astype(int)
 
         # rename column "a" to "amplicon_seq", etc
@@ -620,7 +620,7 @@ def main():
                             debug('Plotting nucleotide percentage quilt for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA))
                             plot(
                                 CRISPRessoPlot.plot_nucleotide_quilt,
-                                nucleotide_quilt_input, 
+                                nucleotide_quilt_input,
                             )
                             plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name)
                             window_nuc_pct_quilt_plot_names.append(plot_name)

From 0f17ad56c2e56e25814dca972b0e3e7481319897 Mon Sep 17 00:00:00 2001
From: Cole Lyman <cole@colelyman.com>
Date: Wed, 1 May 2024 15:42:30 -0600
Subject: [PATCH 14/14] Change test branch back to master

---
 .github/workflows/integration_tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index 02b60904..7d968708 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -46,7 +46,7 @@ jobs:
       with:
         repository: edilytics/CRISPResso2_tests
         token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }}
-        ref: 'cole/add-file-prefix-to-batch' # Use this to specify a branch other than master
+        # ref: '<BRANCH-NAME>' # Use this to specify a branch other than master
 
     - name: Run Basic
       run: |