From 38fd76dbd7ce2087468f9f454b548777de959a68 Mon Sep 17 00:00:00 2001 From: Cole Lyman Date: Wed, 1 May 2024 16:42:28 -0600 Subject: [PATCH] Cole/fix status file name (#69) (#430) * Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam --- CRISPResso2/CRISPRessoAggregateCORE.py | 4 +- CRISPResso2/CRISPRessoBatchCORE.py | 21 +-- CRISPResso2/CRISPRessoCORE.py | 2 +- CRISPResso2/CRISPRessoCompareCORE.py | 147 ++++++++++-------- CRISPResso2/CRISPRessoMetaCORE.py | 2 +- CRISPResso2/CRISPRessoPooledCORE.py | 2 +- CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 2 +- .../templates/batchReport.html | 12 +- .../shared/partials/fig_summaries.html | 1 + CRISPResso2/CRISPRessoShared.py | 8 +- CRISPResso2/CRISPRessoWGSCORE.py | 2 +- .../unit_tests/test_CRISPRessoCompareCORE.py | 85 ++++++++++ 12 files changed, 193 insertions(+), 95 deletions(-) create mode 100644 tests/unit_tests/test_CRISPRessoCompareCORE.py diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index b521dac7..5cd4cba9 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -71,7 +71,7 @@ def main(): parser.add_argument('--debug', help='Show debug messages', action='store_true') parser.add_argument('-v', '--verbosity', type=int, help='Verbosity level of output to the console (1-4), 4 is the most verbose', default=3) - + # CRISPRessoPro params parser.add_argument('--use_matplotlib', action='store_true', help='Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed') @@ -98,7 +98,7 @@ def main(): log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoAggregate_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoAggregate_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 22a8f65e..4c60e2da 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -18,7 +18,7 @@ from CRISPResso2.CRISPRessoReports import CRISPRessoReport if CRISPRessoShared.is_C2Pro_installed(): - from CRISPRessoPro import __version__ as CRISPRessoProVersion + import CRISPRessoPro C2PRO_INSTALLED = True else: C2PRO_INSTALLED = False @@ -127,7 +127,7 @@ def main(): log_filename = _jp('CRISPRessoBatch_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - status_handler = CRISPRessoShared.StatusHandler(_jp('CRISPRessoBatch_status.json')) + status_handler = CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoBatch_status.json')) logger.addHandler(status_handler) with open(log_filename, 'w+') as outfile: @@ -178,7 +178,7 @@ def main(): 'plot_window_size', 'max_rows_alleles_around_cut_to_plot'] for int_col in int_columns: if int_col in batch_params.columns: - batch_params[int_col].fillna(getattr(args, int_col), inplace=True) + batch_params.fillna(value={int_col: getattr(args, int_col)}, inplace=True) batch_params[int_col] = batch_params[int_col].astype(int) # rename column "a" to "amplicon_seq", etc @@ -398,7 +398,10 @@ def main(): large_plot_cutoff = 300 percent_complete_start, percent_complete_end = 90, 99 - percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons) + if all_amplicons: + percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons) + else: + percent_complete_step = 0 # report for amplicons for amplicon_index, amplicon_seq in enumerate(all_amplicons): # only perform comparison if amplicon seen in more than one sample @@ -604,7 +607,7 @@ def main(): # and add it to the list sub_sgRNA_intervals.append((newstart, newend)) - this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) + this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA) nucleotide_quilt_input = { 'nuc_pct_df': sub_nucleotide_percentage_summary_df, 'mod_pct_df': sub_modification_percentage_summary_df, @@ -622,8 +625,6 @@ def main(): plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name) window_nuc_pct_quilt_plot_names.append(plot_name) crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'sgRNA: ' + sgRNA + ' Amplicon: ' + amplicon_name - if len(consensus_guides) == 1: - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = '' crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))] @@ -656,7 +657,7 @@ def main(): # done with per-sgRNA plots if not args.suppress_plots and not args.suppress_batch_summary_plots: # plot the whole region - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, @@ -706,7 +707,7 @@ def main(): else: # guides are not the same if not args.suppress_plots and not args.suppress_batch_summary_plots: - this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt') + this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt') nucleotide_quilt_input = { 'nuc_pct_df': nucleotide_percentage_summary_df, 'mod_pct_df': modification_percentage_summary_df, @@ -775,6 +776,7 @@ def main(): 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'amplicon_name': amplicon_name, } debug('Plotting allele modification heatmap for {0}'.format(amplicon_name)) plot( @@ -806,6 +808,7 @@ def main(): 'sample_sgRNA_intervals': sgRNA_intervals, 'plot_path': plot_path, 'title': modification_type, + 'amplicon_name': amplicon_name, } debug('Plotting allele modification line plot for {0}'.format(amplicon_name)) plot( diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index b3dd67c8..a1590604 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug(): with open(log_filename, 'w+') as outfile: outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPResso_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_status.json'))) aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL") CRISPRessoShared.check_file(aln_matrix_loc) diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 718d4442..d448b1e0 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -8,7 +8,6 @@ from copy import deepcopy import sys import traceback -import argparse from CRISPResso2 import CRISPRessoShared from CRISPResso2.CRISPRessoReports import CRISPRessoReport @@ -32,13 +31,6 @@ def check_library(library_name): sys.exit(1) -def get_amplicon_output(amplicon_name, output_folder): - profile_file=os.path.join(output_folder, amplicon_name+'.effect_vector_combined.txt') - if os.path.exists(quantification_file) and profile_file: - return quantification_file, profile_file - else: - raise CRISPRessoShared.OutputFolderIncompleteException('The folder %s is not a valid CRISPResso2 output folder. Cannot find profile file %s for amplicon %s.' % (output_folder, profile_file, amplicon_name)) - def parse_profile(profile_file): return np.loadtxt(profile_file, skiprows=1) @@ -71,6 +63,35 @@ def normalize_name(name, output_folder_1, output_folder_2): return name +def get_matching_allele_files(run_info_1, run_info_2): + def get_amplicon_info(run_info): + return { + amplicon['sequence']: { + 'name': amplicon_name, + 'guides': amplicon['sgRNA_orig_sequences'], + 'cut_points': amplicon['sgRNA_cut_points'], + 'allele_files': amplicon['allele_frequency_files'], + } + for amplicon_name, amplicon in run_info['results']['refs'].items() + } + amplicons_1 = get_amplicon_info(run_info_1) + amplicons_2 = get_amplicon_info(run_info_2) + matching_allele_files = [] + for sequence_1 in amplicons_1: + if sequence_1 in amplicons_2: + if amplicons_1[sequence_1]['cut_points'] != amplicons_2[sequence_1]['cut_points']: + warn(f'Report 1 has different cut points than report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') + continue + guides_1 = set(amplicons_1[sequence_1]['guides']) + guides_2 = set(amplicons_2[sequence_1]['guides']) + if not guides_1 & guides_2: + warn(f'Report 1 has no shared guides with report 2 for amplicon {amplicons_1[sequence_1]["name"]}, skipping comparison') + continue + matching_allele_files.extend((f_1, f_2) for f_1, f_2 in zip(amplicons_1[sequence_1]['allele_files'], amplicons_2[sequence_1]['allele_files'])) + + return matching_allele_files + + def main(): try: description = ['~~~CRISPRessoCompare~~~', '-Comparison of two CRISPResso analyses-'] @@ -142,7 +163,7 @@ def main(): log_filename = _jp('CRISPRessoCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoCompare_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoCompare_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\nCRISPRessoCompare %s\n\n[Execution log]:\n' % ' '.join(sys.argv)) @@ -238,7 +259,7 @@ def get_plot_title_with_ref_name(plotTitle, refName): crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Editing efficiency comparison' crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Figure 1: Comparison for amplicon ' + amplicon_name + '; Left: Percentage of modified and unmodified reads in each sample; Right: relative percentage of modified and unmodified reads' output_1 = os.path.join(args.crispresso_output_folder_1, run_info_1['running_info']['report_filename']) - output_2 = os.path.join(args.crispresso_output_folder_1, run_info_2['running_info']['report_filename']) + output_2 = os.path.join(args.crispresso_output_folder_2, run_info_2['running_info']['report_filename']) crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [] if os.path.isfile(output_1): crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name].append((sample_1_name +' output', os.path.relpath(output_1, OUTPUT_DIRECTORY))) @@ -350,62 +371,56 @@ def get_plot_title_with_ref_name(plotTitle, refName): #create merged heatmaps for each cut site - allele_files_1 = amplicon_info_1[amplicon_name]['allele_files'] - allele_files_2 = amplicon_info_2[amplicon_name]['allele_files'] - for allele_file_1 in allele_files_1: - allele_file_1_name = os.path.split(allele_file_1)[1] #get file part of path - for allele_file_2 in allele_files_2: - allele_file_2_name = os.path.split(allele_file_2)[1] #get file part of path - #if files are the same (same amplicon, cut site, guide), run comparison - if allele_file_1_name == allele_file_2_name: - df1 = pd.read_csv(allele_file_1, sep="\t") - df2 = pd.read_csv(allele_file_2, sep="\t") - - #find unmodified reference for comparison (if it exists) - ref_seq_around_cut = "" - if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0: - ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] - #otherwise figure out which sgRNA was used for this comparison - elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0: - ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] - else: - seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0] - for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points): - sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]] - if sgRNA_seq in allele_file_1_name: - this_sgRNA_seq = sgRNA_seq - this_cut_point = cut_point - ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)] - break - - merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer') - quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name] - merged[quant_cols] = merged[quant_cols].fillna(0) - lfc_error =0.1 - merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0) - merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False) - merged = merged.reset_index(drop=True).set_index('Aligned_Sequence') - output_root = allele_file_1_name.replace(".txt", "") - allele_comparison_file = _jp(output_root+'.txt') - merged.to_csv(allele_comparison_file, sep="\t", index=None) - - plot_name = '3.'+output_root+'_top' - CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name), - MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) - crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name - crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ - 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.' - crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] - - plot_name = '3.'+output_root+'_bottom' - CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name), - MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) - crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) - crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name - crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ - 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.' - crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] + matching_allele_files = get_matching_allele_files(run_info_1, run_info_2) + for allele_file_1, allele_file_2 in matching_allele_files: + df1 = pd.read_csv(os.path.join(args.crispresso_output_folder_1, allele_file_1), sep="\t") + df2 = pd.read_csv(os.path.join(args.crispresso_output_folder_2, allele_file_2), sep="\t") + + #find unmodified reference for comparison (if it exists) + ref_seq_around_cut = "" + if len(df1.loc[df1['Reference_Sequence'].str.contains('-')==False]) > 0: + ref_seq_around_cut = df1.loc[df1['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] + #otherwise figure out which sgRNA was used for this comparison + elif len(df2.loc[df2['Reference_Sequence'].str.contains('-')==False]) > 0: + ref_seq_around_cut = df2.loc[df2['Reference_Sequence'].str.contains('-')==False]['Reference_Sequence'].iloc[0] + else: + seq_len = df2[df2['Unedited']==True]['Reference_Sequence'].iloc[0] + for sgRNA_interval, cut_point in zip(sgRNA_intervals, cut_points): + sgRNA_seq = consensus_sequence[sgRNA_interval[0]:sgRNA_interval[1]] + if sgRNA_seq in allele_file_1: + this_sgRNA_seq = sgRNA_seq + this_cut_point = cut_point + ref_seq_around_cut=consensus_sequence[max(0, this_cut_point-args.offset_around_cut_to_plot+1):min(seq_len, cut_point+args.offset_around_cut_to_plot+1)] + break + + merged = pd.merge(df1, df2, on = ['Aligned_Sequence', 'Reference_Sequence', 'Unedited', 'n_deleted', 'n_inserted', 'n_mutated'], suffixes=('_' + sample_1_name, '_'+sample_2_name), how='outer') + quant_cols = ['#Reads_'+sample_1_name, '%Reads_'+sample_1_name, '#Reads_'+sample_2_name, '%Reads_'+sample_2_name] + merged[quant_cols] = merged[quant_cols].fillna(0) + lfc_error =0.1 + merged['each_LFC'] = np.log2(((merged['%Reads_'+sample_1_name]+lfc_error)/(merged['%Reads_'+sample_2_name]+lfc_error)).astype(float)).replace([np.inf, np.NaN], 0) + merged = merged.sort_values(['%Reads_'+sample_1_name, 'Reference_Sequence', 'n_deleted', 'n_inserted', 'n_mutated'], ascending=False) + merged = merged.reset_index(drop=True).set_index('Aligned_Sequence') + args.crispresso_output_folder_root = os.path.split(allele_file_1)[1].replace(".txt", "") + allele_comparison_file = _jp(args.crispresso_output_folder_root+'.txt') + merged.to_csv(allele_comparison_file, sep="\t", index=None) + + plot_name = '3.'+args.crispresso_output_folder_root+'_top' + CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=True), sample_1_name, sample_2_name, _jp(plot_name), + MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) + crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) + crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_1_name + crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ + 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_1_name+'.' + crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] + + plot_name = '3.'+args.crispresso_output_folder_root+'_bottom' + CRISPRessoPlot.plot_alleles_table_compare(ref_seq_around_cut, merged.sort_values(['each_LFC'], ascending=False), sample_1_name, sample_2_name, _jp(plot_name), + MIN_FREQUENCY=args.min_frequency_alleles_around_cut_to_plot, MAX_N_ROWS=args.max_rows_alleles_around_cut_to_plot, SAVE_ALSO_PNG=save_png) + crispresso2_info['results']['general_plots']['summary_plot_names'].append(plot_name) + crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'Alleles enriched in ' + sample_2_name + crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Distribution comparison of alleles. Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site. '+ \ + 'The proportion and number of reads is shown for each sample on the right, with the values for ' + sample_1_name + ' followed by the values for ' + sample_2_name +'. Alleles are sorted for enrichment in ' + sample_2_name+'.' + crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Allele comparison table', os.path.basename(allele_comparison_file))] debug('Calculating significant base counts...', {'percent_complete': 95}) sig_counts_filename = _jp('CRISPRessoCompare_significant_base_counts.txt') diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py index 5fff6ab6..4afd6d8d 100644 --- a/CRISPResso2/CRISPRessoMetaCORE.py +++ b/CRISPResso2/CRISPRessoMetaCORE.py @@ -233,7 +233,7 @@ def main(): log_filename=_jp('CRISPRessoMeta_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoMeta_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoMeta_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv)) diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py index 4ccd9d70..95bb179b 100644 --- a/CRISPResso2/CRISPRessoPooledCORE.py +++ b/CRISPResso2/CRISPRessoPooledCORE.py @@ -327,7 +327,7 @@ def main(): log_filename = _jp('CRISPRessoPooled_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooled_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooled_status.json'))) if args.zip_output and not args.place_report_in_output_folder: logger.warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.') diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py index b830e222..538afc9f 100644 --- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py +++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py @@ -231,7 +231,7 @@ def main(): log_filename = _jp('CRISPRessoPooledWGSCompare_RUNNING_LOG.txt') logger.addHandler(logging.FileHandler(log_filename)) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoPooledWGSCompare_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoPooledWGSCompare_status.json'))) with open(log_filename, 'w+') as outfile: outfile.write( diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html index fe4a0200..59bd6f5b 100644 --- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -77,11 +77,7 @@
Nucleotide percentages around guides
{% for plot_name in window_nuc_pct_quilts %}
{{report_data['titles'][plot_name]}}
- {% if plot_name in report_data['htmls'] %} - {{ report_data['htmls'][plot_name]|safe }} - {% else %} - {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} - {% endif %} + {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} {% endfor %}
@@ -95,11 +91,7 @@
Nucleotide percentages in the entire amplicon
{% for plot_name in nuc_pct_quilts %}
{{report_data['titles'][plot_name]}}
- {% if plot_name in report_data['htmls'] %} - {{ report_data['htmls'][plot_name]|safe }} - {% else %} - {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} - {% endif %} + {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} {% endfor %}
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html index 63d40dea..d909a0b2 100644 --- a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html @@ -12,4 +12,5 @@ {% for (data_label,data_path) in report_data['datas'][plot_name] %}

Data: {{data_label}}

{% endfor %} +
diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py index 6c0755ae..98957a49 100644 --- a/CRISPResso2/CRISPRessoShared.py +++ b/CRISPResso2/CRISPRessoShared.py @@ -1856,9 +1856,11 @@ def check_custom_config(args): custom_config['colors'] = config['colors'] return custom_config - except Exception as e: - logger.warn("Cannot read json file '%s', defaulting config parameters." % args.config_file) - print(e) + except Exception: + if args.config_file: + logger.warn("Cannot read config file '%s', defaulting config parameters." % args.config_file) + else: + logger.warn("No config file provided, defaulting config parameters.") return config diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py index 8c01368e..a50b1668 100644 --- a/CRISPResso2/CRISPRessoWGSCORE.py +++ b/CRISPResso2/CRISPRessoWGSCORE.py @@ -347,7 +347,7 @@ def print_stacktrace_if_debug(): except: warn('Folder %s already exists.' % OUTPUT_DIRECTORY) - logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoWGS_status.json'))) + logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoWGS_status.json'))) info('Checking dependencies...') diff --git a/tests/unit_tests/test_CRISPRessoCompareCORE.py b/tests/unit_tests/test_CRISPRessoCompareCORE.py new file mode 100644 index 00000000..6d84019f --- /dev/null +++ b/tests/unit_tests/test_CRISPRessoCompareCORE.py @@ -0,0 +1,85 @@ +"""Unit tests for CRISPRessoCompareCORE.""" + +from CRISPResso2 import CRISPRessoCompareCORE + +from copy import deepcopy +import pytest + + +@pytest.fixture(scope='function') +def run_info(): + return { + 'results': { + 'refs': { + 'Reference': { + 'sequence':'CGGATGTTCCAATCAGTACGCAGAGAGTCGCCGTCTCCAAGGTGAAAGCGGAAGTAGGGCCTTCGCGCACCTCATGGAATCCCTTCTGCAGCACCTGGATCGCTTTTCCGAGCTTCTGGCGGTCTCAAGCACTACCTACGTCAGCACCTGGGACCCCGCCACCGTGCGCCGGGCCTTGCAGTGGGCGCGCTACCTGCGCCACATCCATCGGCGCTTTGGTCGG', + 'sgRNA_orig_sequences': ['GGCCCTTAAAA'], + 'sgRNA_cut_points': [50], + 'allele_frequency_files': ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'], + }, + }, + }, + } + + +@pytest.fixture(scope='function') +def run_info_1(run_info): + return deepcopy(run_info) + + +@pytest.fixture(scope='function') +def run_info_2(run_info): + return deepcopy(run_info) + + +def test_get_matching_allele_files(run_info): + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info, run_info) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_different_cut_points(run_info_1, run_info_2): + run_info_2['results']['refs']['Reference']['sgRNA_cut_points'] = [50, 51] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [] + + +def test_get_matching_allele_files_different_guides(run_info_1, run_info_2): + run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] = ['GGCCCTTAAAC'] + run_info_2['results']['refs']['Reference']['allele_frequency_files'] = ['Alleles_frequency_table_around_sgRNA_GGCCCTTAAAC.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [] + + +def test_get_matching_allele_files_multiple_alleles(run_info_1, run_info_2): + run_info_1['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference']) + run_info_1['results']['refs']['Other_Amplicon']['sequence'] = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAA' + run_info_1['results']['refs']['Other_Amplicon']['allele_frequency_files'] = ['Other_Amplicon.Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_different_amplicon_names_same_sequence(run_info_1, run_info_2): + run_info_2['results']['refs']['Other_Amplicon'] = deepcopy(run_info_1['results']['refs']['Reference']) + del run_info_2['results']['refs']['Reference'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_some_different_guides(run_info_1, run_info_2): + run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt')] + + +def test_get_matching_allele_files_multiple_guides(run_info_1, run_info_2): + run_info_1['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_1['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + run_info_2['results']['refs']['Reference']['sgRNA_orig_sequences'] += ['AAAAAAAAAAAAAAAAAAA'] + run_info_2['results']['refs']['Reference']['allele_frequency_files'] += ['Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'] + matching_allele_files = CRISPRessoCompareCORE.get_matching_allele_files(run_info_1, run_info_2) + assert matching_allele_files == [ + ('Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt', 'Alleles_frequency_table_around_sgRNA_GGCCCTTAAAA.txt'), + ('Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt', 'Alleles_frequency_table_around_sgRNA_AAAAAAAAAAAAAAAAAAA.txt'), + ] +