From 020c6cc5b3778d0e1b291a1b802c3158e186bd97 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Tue, 23 Apr 2024 16:13:50 -0600 Subject: [PATCH 01/14] Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. --- CRISPResso2/CRISPRessoShared.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py index 036d48d9..4af227fe 100644 --- a/CRISPResso2/CRISPRessoShared.py +++ b/CRISPResso2/CRISPRessoShared.py @@ -1856,9 +1856,11 @@ def check_custom_config(args): custom_config['colors'] = config['colors'] return custom_config - except Exception as e: - logger.warn("Cannot read json file '%s', defaulting config parameters." % args.config_file) - print(e) + except Exception: + if args.config_file: + logger.warn("Cannot read config file '%s', defaulting config parameters." % args.config_file) + else: + logger.warn("No config file provided, defaulting config parameters.") return config From 80a82a6d6a004b29d43655b6be89d9c5a90d101a Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 24 Apr 2024 13:01:09 -0600 Subject: [PATCH 02/14] Fix divide by zero when no amplicons are present in Batch mode --- CRISPResso2/CRISPRessoBatchCORE.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 0e198d66..fee6feed 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -398,7 +398,10 @@ def main(): large_plot_cutoff = 300 percent_complete_start, percent_complete_end = 90, 99 - percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons) + if all_amplicons: + percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons) + else: + percent_complete_step = 0 # report for amplicons for amplicon_index, amplicon_seq in enumerate(all_amplicons): # only perform comparison if amplicon seen in more than one sample From b3f8f2da4f583880c241ffd1a47237c88413ab2c Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 24 Apr 2024 13:11:07 -0600 Subject: [PATCH 03/14] Don't append file_prefix to status file name --- CRISPResso2/CRISPRessoAggregateCORE.py | 4 ++-- CRISPResso2/CRISPRessoCORE.py | 2 +- CRISPResso2/CRISPRessoCompareCORE.py | 2 +- CRISPResso2/CRISPRessoMetaCORE.py | 2 +- CRISPResso2/CRISPRessoPooledCORE.py | 2 +- CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 2 +- CRISPResso2/CRISPRessoWGSCORE.py | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index b521dac7..10e36a43 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -71,7 +71,7 @@ def main(): parser.add_argument('--debug', help='Show debug messages', action='store_true') parser.add_argument('-v', '--verbosity', type=int, help='Verbosity level of output to the console (1-4), 4 is the most verbose', default=3) - + # CRISPRessoPro params parser.add_argument('--use_matplotlib', action='store_true', help='Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed') @@ -98,7 +98,7 @@ def main(): log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoAggregate_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoAggregate_status.json')) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index b3dd67c8..92ed7cc7 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug(): with open(log_filename, 'w+') as outfile: outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPResso_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPResso_status.json')) aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL") CRISPRessoShared.check_file(aln_matrix_loc) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 718d4442..9d8041a4 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -142,7 +142,7 @@ def main(): log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoCompare_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoCompare_status.json')) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py index 5fff6ab6..a771e24f 100644 --- a/CRISPResso2/CRISPRessoMetaCORE.py +++ b/CRISPResso2/CRISPRessoMetaCORE.py @@ -233,7 +233,7 @@ def main(): log_filename=_jp('CRISPRessoMeta_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoMeta_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoMeta_status.json')) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py index 124ec705..8f7305c7 100644 --- a/CRISPResso2/CRISPRessoPooledCORE.py +++ b/CRISPResso2/CRISPRessoPooledCORE.py @@ -327,7 +327,7 @@ def main(): log_filename = _jp('CRISPRessoPooled_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooled_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooled_status.json')) if args.zip_output and not args.place_report_in_output_folder: logger.warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.') diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py index b830e222..bfb22370 100644 --- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py +++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py @@ -231,7 +231,7 @@ def main(): log_filename = _jp('CRISPRessoPooledWGSCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooledWGSCompare_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooledWGSCompare_status.json')) with open(log_filename, 'w+') as outfile: outfile.write( diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py index e8042c01..aea9dbfb 100644 --- a/CRISPResso2/CRISPRessoWGSCORE.py +++ b/CRISPResso2/CRISPRessoWGSCORE.py @@ -347,7 +347,7 @@ def print_stacktrace_if_debug(): except: warn('Folder %s already exists.' % OUTPUT_DIRECTORY) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoWGS_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoWGS_status.json')) info('Checking dependencies...') From 4390b862150658040ce86b4e18c368a584de522f Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 24 Apr 2024 13:53:53 -0600 Subject: [PATCH 04/14] Place status files in output directories --- CRISPResso2/CRISPRessoAggregateCORE.py | 2 +- CRISPResso2/CRISPRessoBatchCORE.py | 2 +- CRISPResso2/CRISPRessoCORE.py | 2 +- CRISPResso2/CRISPRessoCompareCORE.py | 2 +- CRISPResso2/CRISPRessoMetaCORE.py | 2 +- CRISPResso2/CRISPRessoPooledCORE.py | 2 +- CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 2 +- CRISPResso2/CRISPRessoWGSCORE.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index 10e36a43..5cd4cba9 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -98,7 +98,7 @@ def main(): log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoAggregate_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoAggregate_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index fee6feed..0dfb5953 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -127,7 +127,7 @@ def main(): log_filename = _jp('CRISPRessoBatch_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - status_handler = CRISPRessoShared.StatusHandler(_jp('CRISPRessoBatch_status.json')) + status_handler = CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoBatch_status.json')) logger.addHandler(status_handler) with open(log_filename, 'w+') as outfile: diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index 92ed7cc7..a1590604 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug(): with open(log_filename, 'w+') as outfile: outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPResso_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_status.json'))) aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL") CRISPRessoShared.check_file(aln_matrix_loc) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 9d8041a4..a47a05e0 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -142,7 +142,7 @@ def main(): log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoCompare_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoCompare_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py index a771e24f..4afd6d8d 100644 --- a/CRISPResso2/CRISPRessoMetaCORE.py +++ b/CRISPResso2/CRISPRessoMetaCORE.py @@ -233,7 +233,7 @@ def main(): log_filename=_jp('CRISPRessoMeta_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoMeta_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoMeta_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py index 8f7305c7..b2fe7207 100644 --- a/CRISPResso2/CRISPRessoPooledCORE.py +++ b/CRISPResso2/CRISPRessoPooledCORE.py @@ -327,7 +327,7 @@ def main(): log_filename = _jp('CRISPRessoPooled_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooled_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooled_status.json'))) if args.zip_output and not args.place_report_in_output_folder: logger.warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.') diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py index bfb22370..538afc9f 100644 --- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py +++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py @@ -231,7 +231,7 @@ def main(): log_filename = _jp('CRISPRessoPooledWGSCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoPooledWGSCompare_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooledWGSCompare_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write( diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py index aea9dbfb..ccb27df9 100644 --- a/CRISPResso2/CRISPRessoWGSCORE.py +++ b/CRISPResso2/CRISPRessoWGSCORE.py @@ -347,7 +347,7 @@ def print_stacktrace_if_debug(): except: warn('Folder %s already exists.' % OUTPUT_DIRECTORY) - logger.addHandler(CRISPRessoShared.StatusHandler('CRISPRessoWGS_status.json')) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoWGS_status.json'))) info('Checking dependencies...') From b9daad380e79634e345913a6235ba913d8add22d Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 24 Apr 2024 16:12:47 -0600 Subject: [PATCH 05/14] Update tests branch for file_prefix addition --- .github/workflows/integration_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 7d968708..02b60904 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -46,7 +46,7 @@ jobs: with: repository: edilytics/CRISPResso2_tests token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }} - # ref: '' # Use this to specify a branch other than master + ref: 'cole/add-file-prefix-to-batch' # Use this to specify a branch other than master - name: Run Basic run: | From efe18d915dd9c1638348c37bd54d8b73e0bb8e8c Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 25 Apr 2024 10:34:33 -0600 Subject: [PATCH 06/14] Load D3 and plotly figures with pro with multiple amplicons --- CRISPResso2/CRISPRessoBatchCORE.py | 12 ++++++------ .../CRISPRessoReports/templates/batchReport.html | 12 ++---------- .../templates/shared/partials/fig_summaries.html | 1 + 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 0dfb5953..b9c85099 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -607,7 +607,7 @@ def main(): # and add it to the list sub_sgRNA_intervals.append((newstart, newend)) - this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) + this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) nucleotide_quilt_input = { 'nuc_pct_df': sub_nucleotide_percentage_summary_df, 'mod_pct_df': sub_modification_percentage_summary_df, @@ -620,13 +620,11 @@ def main(): debug('Plotting nucleotide percentage quilt for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA)) plot( CRISPRessoPlot.plot_nucleotide_quilt, - nucleotide_quilt_input, + nucleotide_quilt_input, ) plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name) window_nuc_pct_quilt_plot_names.append(plot_name) crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'sgRNA: ' + sgRNA + ' Amplicon: ' + amplicon_name - if len(consensus_guides) == 1: - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = '' crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] @@ -659,7 +657,7 @@ def main(): # done with per-sgRNA plots if not args.suppress_plots and not args.suppress_batch_summary_plots: # plot the whole region - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, @@ -709,7 +707,7 @@ def main(): else: # guides are not the same if not args.suppress_plots and not args.suppress_batch_summary_plots: - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, @@ -778,6 +776,7 @@ def main(): 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'amplicon_name': amplicon_name, } debug('Plotting allele modification heatmap for {0}'.format(amplicon_name)) plot( @@ -809,6 +808,7 @@ def main(): 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'amplicon_name': amplicon_name, } debug('Plotting allele modification line plot for {0}'.format(amplicon_name)) plot( diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html index fe4a0200..59bd6f5b 100644 --- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -77,11 +77,7 @@
Nucleotide percentages around guides
{% for plot_name in window_nuc_pct_quilts %}
{{report_data['titles'][plot_name]}}
- {% if plot_name in report_data['htmls'] %} - {{ report_data['htmls'][plot_name]|safe }} - {% else %} - {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} - {% endif %} + {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} {% endfor %}
@@ -95,11 +91,7 @@
Nucleotide percentages in the entire amplicon
{% for plot_name in nuc_pct_quilts %}
{{report_data['titles'][plot_name]}}
- {% if plot_name in report_data['htmls'] %} - {{ report_data['htmls'][plot_name]|safe }} - {% else %} - {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} - {% endif %} + {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} {% endfor %}
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html index 63d40dea..d909a0b2 100644 --- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html @@ -12,4 +12,5 @@ {% for (data_label,data_path) in report_data['datas'][plot_name] %}

Data: {{data_label}}

{% endfor %} +
From a49639da816109e1d87a073f9b3206022c30e142 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 25 Apr 2024 14:38:16 -0600 Subject: [PATCH 07/14] Update batch --- CRISPResso2/CRISPRessoBatchCORE.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index b9c85099..e27e93f3 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -607,7 +607,7 @@ def main(): # and add it to the list sub_sgRNA_intervals.append((newstart, newend)) - this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) + this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) nucleotide_quilt_input = { 'nuc_pct_df': sub_nucleotide_percentage_summary_df, 'mod_pct_df': sub_modification_percentage_summary_df, @@ -657,7 +657,7 @@ def main(): # done with per-sgRNA plots if not args.suppress_plots and not args.suppress_batch_summary_plots: # plot the whole region - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, @@ -707,7 +707,7 @@ def main(): else: # guides are not the same if not args.suppress_plots and not args.suppress_batch_summary_plots: - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + '_Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, From 62cf9fc4e224225e7b4d90b47fede4ae493f7d4e Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Thu, 25 Apr 2024 09:18:59 -0600 Subject: [PATCH 08/14] Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. --- CRISPResso2/CRISPRessoCompareCORE.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index a47a05e0..48b313cb 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -238,7 +238,7 @@ def get_plot_title_with_ref_name(plotTitle, refName): crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Editing efficiency comparison' crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Figure 1: Comparison for amplicon ' + amplicon_name + '; Left: Percentage of modified and unmodified reads in each sample; Right: relative percentage of modified and unmodified reads' output_1 = os.path.join(args.crispresso_output_folder_1, run_info_1['running_info']['report_filename']) - output_2 = os.path.join(args.crispresso_output_folder_1, run_info_2['running_info']['report_filename']) + output_2 = os.path.join(args.crispresso_output_folder_2, run_info_2['running_info']['report_filename']) crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [] if os.path.isfile(output_1): crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name].append((sample_1_name +' output', os.path.relpath(output_1, OUTPUT_DIRECTORY))) From cddcf5953822ef03c8762792291a9c3e8139c388 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Fri, 26 Apr 2024 13:48:25 -0600 Subject: [PATCH 09/14] Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available --- CRISPResso2/CRISPRessoBatchCORE.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index e27e93f3..53211012 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -18,7 +18,7 @@ from CRISPResso2.CRISPRessoReports import CRISPRessoReport if CRISPRessoShared.is_C2Pro_installed(): - from CRISPRessoPro import __version__ as CRISPRessoProVersion + import CRISPRessoPro C2PRO_INSTALLED = True else: C2PRO_INSTALLED = False From c7c0ab2bb5465c0e5317adb52bf1e3dd5f1ddc56 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Fri, 26 Apr 2024 13:49:22 -0600 Subject: [PATCH 10/14] Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import --- CRISPResso2/CRISPRessoCompareCORE.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 48b313cb..1e55c342 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -8,7 +8,6 @@ from copy import deepcopy import sys import traceback -import argparse from CRISPResso2 import CRISPRessoShared from CRISPResso2.CRISPRessoReports import CRISPRessoReport @@ -32,13 +31,6 @@ def check_library(library_name): sys.exit(1) -def get_amplicon_output(amplicon_name, output_folder): - profile_file=os.path.join(output_folder, amplicon_name+'.effect_vector_combined.txt') - if os.path.exists(quantification_file) and profile_file: - return quantification_file, profile_file - else: - raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder, profile_file, amplicon_name)) - def parse_profile(profile_file): return np.loadtxt(profile_file, skiprows=1) From 43974a2aba9c5c699eda3328187b9da43adcbe8d Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Fri, 26 Apr 2024 13:55:13 -0600 Subject: [PATCH 11/14] Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests --- CRISPResso2/CRISPRessoCompareCORE.py | 133 ++++++++++-------- .../unit_tests/test_CRISPRessoCompareCORE.py | 65 +++++++++ 2 files changed, 142 insertions(+), 56 deletions(-) create mode 100644 tests/unit_tests/test_CRISPRessoCompareCORE.py diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 1e55c342..55b758e4 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -63,6 +63,33 @@ def normalize_name(name, output_folder_1, output_folder_2): return name +def get_matching_allele_files(run_info_1, run_info_2): + def get_amplicon_info(run_info): + return { + amplicon['sequence']: { + 'name': amplicon_name, + 'guides': amplicon['sgRNA_orig_sequences'], + 'cut_points': amplicon['sgRNA_cut_points'], + 'allele_files': amplicon['allele_frequency_files'], + } + for amplicon_name, amplicon in run_info['results']['refs'].items() + } + amplicons_1 = get_amplicon_info(run_info_1) + amplicons_2 = get_amplicon_info(run_info_2) + matching_allele_files = [] + for sequence_1 in amplicons_1: + if sequence_1 in amplicons_2: + if amplicons_1[sequence_1]['guides'] != amplicons_2[sequence_1]['guides']: + warn(f'Report 1 has different guides than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') + continue + if amplicons_1[sequence_1]['cut_points'] != amplicons_2[sequence_1]['cut_points']: + warn(f'Report 1 has different cut points than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') + continue + matching_allele_files.extend((f_1, f_2) for f_1, f_2 in zip(amplicons_1[sequence_1]['allele_files'], amplicons_2[sequence_1]['allele_files'])) + + return matching_allele_files + + def main(): try: description = ['~~~CRISPRessoCompare~~~', '-Comparison of two CRISPResso analyses-'] @@ -342,62 +369,56 @@ def get_plot_title_with_ref_name(plotTitle, refName): #create merged heatmaps for each cut site - allele_files_1 = amplicon_info_1[amplicon_name]['allele_files'] - allele_files_2 = amplicon_info_2[amplicon_name]['allele_files'] - for allele_file_1 in allele_files_1: - allele_file_1_name = os.path.split(allele_file_1)[1] #get file part of path - for allele_file_2 in allele_files_2: - allele_file_2_name = os.path.split(allele_file_2)[1] #get file part of path - #if files are the same (same amplicon, cut site, guide), run comparison - if allele_file_1_name == allele_file_2_name: - df1 = pd.read_csv(allele_file_1, sep="\t") - df2 = pd.read_csv(allele_file_2, sep="\t") - - #find unmodified reference for comparison (if it exists) - ref_seq_around_cut = "" - if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0: - ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] - #otherwise figure out which sgRNA was used for this comparison - elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0: - ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] - else: - seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0] - for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points): - sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]] - if sgRNA_seq in allele_file_1_name: - this_sgRNA_seq = sgRNA_seq - this_cut_point = cut_point - ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)] - break - - merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer') - quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name] - merged[quant_cols] = merged[quant_cols].fillna(0) - lfc_error =0.1 - merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0) - merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False) - merged = merged.reset_index(drop=True).set_index('Aligned_Sequence') - output_root = allele_file_1_name.replace(".txt", "") - allele_comparison_file = _jp(output_root+'.txt') - merged.to_csv(allele_comparison_file, sep="\t", index=None) - - plot_name = '3.'+output_root+'_top' - CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name), - MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) - crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name - crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ - 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.' - crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] - - plot_name = '3.'+output_root+'_bottom' - CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name), - MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) - crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name - crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ - 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.' - crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] + matching_allele_files = get_matching_allele_files(run_info_1, run_info_2) + for allele_file_1, allele_file_2 in matching_allele_files: + df1 = pd.read_csv(os.path.join(args.crispresso_output_folder_1, allele_file_1), sep="\t") + df2 = pd.read_csv(os.path.join(args.crispresso_output_folder_2, allele_file_2), sep="\t") + + #find unmodified reference for comparison (if it exists) + ref_seq_around_cut = "" + if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0: + ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] + #otherwise figure out which sgRNA was used for this comparison + elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0: + ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] + else: + seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0] + for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points): + sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]] + if sgRNA_seq in allele_file_1: + this_sgRNA_seq = sgRNA_seq + this_cut_point = cut_point + ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)] + break + + merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer') + quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name] + merged[quant_cols] = merged[quant_cols].fillna(0) + lfc_error =0.1 + merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0) + merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False) + merged = merged.reset_index(drop=True).set_index('Aligned_Sequence') + args.crispresso_output_folder_root = os.path.split(allele_file_1)[1].replace(".txt", "") + allele_comparison_file = _jp(args.crispresso_output_folder_root+'.txt') + merged.to_csv(allele_comparison_file, sep="\t", index=None) + + plot_name = '3.'+args.crispresso_output_folder_root+'_top' + CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name), + MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) + crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) + crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name + crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ + 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.' + crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] + + plot_name = '3.'+args.crispresso_output_folder_root+'_bottom' + CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name), + MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) + crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) + crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name + crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ + 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.' + crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] debug('Calculating significant base counts...', {'percent_complete': 95}) sig_counts_filename = _jp('CRISPRessoCompare_significant_base_counts.txt') diff --git a/tests/unit_tests/test_CRISPRessoCompareCORE.py b/tests/unit_tests/test_CRISPRessoCompareCORE.py new file mode 100644 index 00000000..9bbe7e16 --- /dev/null +++ b/tests/unit_tests/test_CRISPRessoCompareCORE.py @@ -0,0 +1,65 @@ +"""Unit tests for CRISPRessoCompareCORE.""" + +from CRISPResso2 import CRISPRessoCompareCORE + +from copy import deepcopy +import pytest + + +@pytest.fixture(scope='function') +def run_info(): + return { + 'results': { + 'refs': { + 'Reference': { + 'sequence':'CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGG', + 'sgRNA_orig_sequences': ['GGCCCTTAAAA'], + 'sgRNA_cut_points': [50], + 'allele_frequency_files': ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'], + }, + }, + }, + } + + +@pytest.fixture(scope='function') +def run_info_1(run_info): + return deepcopy(run_info) + + +@pytest.fixture(scope='function') +def run_info_2(run_info): + return deepcopy(run_info) + + +def test_get_matching_allele_files(run_info): + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info, run_info) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_different_cut_points(run_info_1, run_info_2): + run_info_2['results']['refs']['Reference']['sgRNA_cut_points'] = [50, 51] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [] + + +def test_get_matching_allele_files_different_guides(run_info_1, run_info_2): + run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] = ['GGCCCTTAAAC'] + run_info_2['results']['refs']['Reference']['allele_frequency_files'] = ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAC.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [] + + +def test_get_matching_allele_files_multiple_alleles(run_info_1, run_info_2): + run_info_1['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference']) + run_info_1['results']['refs']['Other_Amplicon']['sequence'] = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAA' + run_info_1['results']['refs']['Other_Amplicon']['allele_frequency_files'] = ['Other_Amplicon.Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_different_amplicon_names_same_sequence(run_info_1, run_info_2): + run_info_2['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference']) + del run_info_2['results']['refs']['Reference'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] From 517671f9b6729c1570bafce871c56b64f92fdfa3 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Fri, 26 Apr 2024 14:20:04 -0600 Subject: [PATCH 12/14] Allow for matching of multiple guides in the same amplicon --- CRISPResso2/CRISPRessoCompareCORE.py | 8 +++++--- .../unit_tests/test_CRISPRessoCompareCORE.py | 20 +++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 55b758e4..d448b1e0 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -79,12 +79,14 @@ def get_amplicon_info(run_info): matching_allele_files = [] for sequence_1 in amplicons_1: if sequence_1 in amplicons_2: - if amplicons_1[sequence_1]['guides'] != amplicons_2[sequence_1]['guides']: - warn(f'Report 1 has different guides than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') - continue if amplicons_1[sequence_1]['cut_points'] != amplicons_2[sequence_1]['cut_points']: warn(f'Report 1 has different cut points than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') continue + guides_1 = set(amplicons_1[sequence_1]['guides']) + guides_2 = set(amplicons_2[sequence_1]['guides']) + if not guides_1 & guides_2: + warn(f'Report 1 has no shared guides with report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') + continue matching_allele_files.extend((f_1, f_2) for f_1, f_2 in zip(amplicons_1[sequence_1]['allele_files'], amplicons_2[sequence_1]['allele_files'])) return matching_allele_files diff --git a/tests/unit_tests/test_CRISPRessoCompareCORE.py b/tests/unit_tests/test_CRISPRessoCompareCORE.py index 9bbe7e16..6d84019f 100644 --- a/tests/unit_tests/test_CRISPRessoCompareCORE.py +++ b/tests/unit_tests/test_CRISPRessoCompareCORE.py @@ -63,3 +63,23 @@ def test_get_matching_allele_files_different_amplicon_names_same_sequence(run_in del run_info_2['results']['refs']['Reference'] matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_some_different_guides(run_info_1, run_info_2): + run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_multiple_guides(run_info_1, run_info_2): + run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_2['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [ + ('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'), + ('Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt', 'Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'), + ] + From ebe8276070abba74d3f5a7e6e71dea60f0dffa33 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 1 May 2024 14:45:28 -0600 Subject: [PATCH 13/14] Fix pandas FutureWarning --- CRISPResso2/CRISPRessoBatchCORE.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index de31b142..4c60e2da 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -178,7 +178,7 @@ def main(): 'plot_window_size', 'max_rows_alleles_around_cut_to_plot'] for int_col in int_columns: if int_col in batch_params.columns: - batch_params[int_col].fillna(getattr(args, int_col), inplace=True) + batch_params.fillna(value={int_col: getattr(args, int_col)}, inplace=True) batch_params[int_col] = batch_params[int_col].astype(int) # rename column "a" to "amplicon_seq", etc @@ -620,7 +620,7 @@ def main(): debug('Plotting nucleotide percentage quilt for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA)) plot( CRISPRessoPlot.plot_nucleotide_quilt, - nucleotide_quilt_input, + nucleotide_quilt_input, ) plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name) window_nuc_pct_quilt_plot_names.append(plot_name) From 0f17ad56c2e56e25814dca972b0e3e7481319897 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 1 May 2024 15:42:30 -0600 Subject: [PATCH 14/14] Change test branch back to master --- .github/workflows/integration_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 02b60904..7d968708 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -46,7 +46,7 @@ jobs: with: repository: edilytics/CRISPResso2_tests token: ${{ secrets.ACCESS_CRISPRESSO2_TESTS }} - ref: 'cole/add-file-prefix-to-batch' # Use this to specify a branch other than master + # ref: '' # Use this to specify a branch other than master - name: Run Basic run: |