diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index 06bf5aae..0699c109 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -4476,13 +4476,24 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ new_sel_cols_start = cut_point - plot_half_window for (int_start, int_end) in refs[ref_name]['sgRNA_intervals']: new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1, int_end - new_sel_cols_start - 1)] + + + prepped_df_alleles, annotations, y_labels, insertion_dict, per_element_annot_kws, is_reference = CRISPRessoPlot.prep_alleles_table( + df_to_plot, + ref_seq_around_cut, + args.max_rows_alleles_around_cut_to_plot, + args.min_frequency_alleles_around_cut_to_plot, + ) plot_9_input = { 'reference_seq': ref_seq_around_cut, - 'df_alleles': df_to_plot, + 'prepped_df_alleles': prepped_df_alleles, + 'annotations': annotations, + 'y_labels': y_labels, + 'insertion_dict': insertion_dict, + 'per_element_annot_kws': per_element_annot_kws, + 'is_reference': is_reference, 'fig_filename_root': fig_filename_root, 'custom_colors': custom_config["colors"], - 'MIN_FREQUENCY': args.min_frequency_alleles_around_cut_to_plot, - 'MAX_N_ROWS': args.max_rows_alleles_around_cut_to_plot, 'SAVE_ALSO_PNG': save_png, 'plot_cut_point': plot_cut_point, 'sgRNA_intervals': new_sgRNA_intervals, @@ -4491,7 +4502,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'annotate_wildtype_allele': args.annotate_wildtype_allele, } debug('Plotting allele distribution around cut for {0}'.format(ref_name)) - plot(CRISPRessoPlot.plot_alleles_table, plot_9_input) + plot(CRISPRessoPlot.plot_alleles_table_prepped, plot_9_input) crispresso2_info['results']['refs'][ref_name]['plot_9_roots'].append(os.path.basename(fig_filename_root)) crispresso2_info['results']['refs'][ref_name]['plot_9_captions'].append("Figure 9: Visualization of the distribution of identified alleles around the cleavage site for the " + sgRNA_legend + ". Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site.") crispresso2_info['results']['refs'][ref_name]['plot_9_datas'].append([('Allele frequency table', os.path.basename(allele_filename))]) diff --git a/CRISPResso2/CRISPRessoPlot.py b/CRISPResso2/CRISPRessoPlot.py index 5c68de04..ac2f50cc 100644 --- a/CRISPResso2/CRISPRessoPlot.py +++ b/CRISPResso2/CRISPRessoPlot.py @@ -2965,6 +2965,7 @@ def plot_amino_acid_heatmap( fig.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,)) plt.close(fig) + def prep_alleles_table(df_alleles, reference_seq, MAX_N_ROWS, MIN_FREQUENCY): """ Prepares a df of alleles for Plotting @@ -3398,6 +3399,93 @@ def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,i plt.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,), pad_inches=0.1) plt.close() + +def plot_alleles_table_prepped( + reference_seq, + prepped_df_alleles, + annotations, + y_labels, + insertion_dict, + per_element_annot_kws, + is_reference, + fig_filename_root, + custom_colors, + SAVE_ALSO_PNG=False, + plot_cut_point=True, + cut_point_ind=None, + sgRNA_intervals=None, + sgRNA_names=None, + sgRNA_mismatches=None, + annotate_wildtype_allele='****', + **kwargs, +): + """Plot an allele table for a pre-filtered dataframe with allele frequencies. + + Parameters + ---------- + reference_seq : str + The reference amplicon sequence to plot. + prepped_df_alleles : pd.DataFrame + Merged dataframe (should include columns "#Reads','%Reads"), from `CRISPRessoPlot.prep_alleles_table`. + annotations : list + List of annotations for each allele, from `CRISPRessoPlot.prep_alleles_table`. + y_labels : list + List of labels for each row/allele, from `CRISPRessoPlot.prep_alleles_table`. + insertion_dict : dict + Locations of insertions -- red squares will be drawn around these, from `CRISPRessoPlot.prep_alleles_table`. + per_element_annot_kws : list + Annotations for each cell (e.g. bold for substitutions, etc.), from `CRISPRessoPlot.prep_alleles_table`. + is_reference : list + List of booleans for whether the read is equal to the reference, from `CRISPRessoPlot.prep_alleles_table`. + fig_filename_root : str + Figure filename to plot (not including '.pdf' or '.png'). + custom_colors : dict + Dict of colors to plot (e.g. colors['A'] = (1,0,0,0.4) # red,blue,green,alpha ). + SAVE_ALSO_PNG : bool + Whether to write png file as well. + plot_cut_point : bool + If False, won't draw 'predicted cleavage' line. + cut_point_ind : int + Index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2). + sgRNA_intervals : list + Locations where sgRNAs are located. + sgRNA_names : list + Names of sgRNAs (otherwise empty). + sgRNA_mismatches : list + Array (for each sgRNA_interval) of locations in sgRNA where there are mismatches. + annotate_wildtype_allele : str + String to add to the end of the wildtype allele (e.g. '****' or ''). + kwargs : dict + Additional keyword arguments. + + Returns + ------- + None + """ + if annotate_wildtype_allele != '': + for ix, is_ref in enumerate(is_reference): + if is_ref: + y_labels[ix] += annotate_wildtype_allele + + plot_alleles_heatmap( + reference_seq=reference_seq, + fig_filename_root=fig_filename_root, + X=prepped_df_alleles, + annot=annotations, + y_labels=y_labels, + insertion_dict=insertion_dict, + per_element_annot_kws=per_element_annot_kws, + custom_colors=custom_colors, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + cut_point_ind=cut_point_ind, + sgRNA_intervals=sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches, + ) + + + def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='****',**kwargs): """ plots an allele table for a dataframe with allele frequencies