Skip to content

Commit

Permalink
Reduce memory usage for allele plots (pinellolab#478)
Browse files Browse the repository at this point in the history
* Create new function to plot memory reduced alleles table plot

* Round percentage complete in CLI and add initial 0% complete (#100)

---------

Co-authored-by: Kendell Clement <k.clement.dev@gmail.com>
  • Loading branch information
2 people authored and mbowcut2 committed Oct 14, 2024
1 parent 288291e commit ed3fec4
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 4 deletions.
19 changes: 15 additions & 4 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -4476,13 +4476,24 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
new_sel_cols_start = cut_point - plot_half_window
for (int_start, int_end) in refs[ref_name]['sgRNA_intervals']:
new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1, int_end - new_sel_cols_start - 1)]


prepped_df_alleles, annotations, y_labels, insertion_dict, per_element_annot_kws, is_reference = CRISPRessoPlot.prep_alleles_table(
df_to_plot,
ref_seq_around_cut,
args.max_rows_alleles_around_cut_to_plot,
args.min_frequency_alleles_around_cut_to_plot,
)
plot_9_input = {
'reference_seq': ref_seq_around_cut,
'df_alleles': df_to_plot,
'prepped_df_alleles': prepped_df_alleles,
'annotations': annotations,
'y_labels': y_labels,
'insertion_dict': insertion_dict,
'per_element_annot_kws': per_element_annot_kws,
'is_reference': is_reference,
'fig_filename_root': fig_filename_root,
'custom_colors': custom_config["colors"],
'MIN_FREQUENCY': args.min_frequency_alleles_around_cut_to_plot,
'MAX_N_ROWS': args.max_rows_alleles_around_cut_to_plot,
'SAVE_ALSO_PNG': save_png,
'plot_cut_point': plot_cut_point,
'sgRNA_intervals': new_sgRNA_intervals,
Expand All @@ -4491,7 +4502,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
'annotate_wildtype_allele': args.annotate_wildtype_allele,
}
debug('Plotting allele distribution around cut for {0}'.format(ref_name))
plot(CRISPRessoPlot.plot_alleles_table, plot_9_input)
plot(CRISPRessoPlot.plot_alleles_table_prepped, plot_9_input)
crispresso2_info['results']['refs'][ref_name]['plot_9_roots'].append(os.path.basename(fig_filename_root))
crispresso2_info['results']['refs'][ref_name]['plot_9_captions'].append("Figure 9: Visualization of the distribution of identified alleles around the cleavage site for the " + sgRNA_legend + ". Nucleotides are indicated by unique colors (A = green; C = red; G = yellow; T = purple). Substitutions are shown in bold font. Red rectangles highlight inserted sequences. Horizontal dashed lines indicate deleted sequences. The vertical dashed line indicates the predicted cleavage site.")
crispresso2_info['results']['refs'][ref_name]['plot_9_datas'].append([('Allele frequency table', os.path.basename(allele_filename))])
Expand Down
88 changes: 88 additions & 0 deletions CRISPResso2/CRISPRessoPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2965,6 +2965,7 @@ def plot_amino_acid_heatmap(
fig.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,))
plt.close(fig)


def prep_alleles_table(df_alleles, reference_seq, MAX_N_ROWS, MIN_FREQUENCY):
"""
Prepares a df of alleles for Plotting
Expand Down Expand Up @@ -3398,6 +3399,93 @@ def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,i
plt.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,), pad_inches=0.1)
plt.close()


def plot_alleles_table_prepped(
reference_seq,
prepped_df_alleles,
annotations,
y_labels,
insertion_dict,
per_element_annot_kws,
is_reference,
fig_filename_root,
custom_colors,
SAVE_ALSO_PNG=False,
plot_cut_point=True,
cut_point_ind=None,
sgRNA_intervals=None,
sgRNA_names=None,
sgRNA_mismatches=None,
annotate_wildtype_allele='****',
**kwargs,
):
"""Plot an allele table for a pre-filtered dataframe with allele frequencies.
Parameters
----------
reference_seq : str
The reference amplicon sequence to plot.
prepped_df_alleles : pd.DataFrame
Merged dataframe (should include columns "#Reads','%Reads"), from `CRISPRessoPlot.prep_alleles_table`.
annotations : list
List of annotations for each allele, from `CRISPRessoPlot.prep_alleles_table`.
y_labels : list
List of labels for each row/allele, from `CRISPRessoPlot.prep_alleles_table`.
insertion_dict : dict
Locations of insertions -- red squares will be drawn around these, from `CRISPRessoPlot.prep_alleles_table`.
per_element_annot_kws : list
Annotations for each cell (e.g. bold for substitutions, etc.), from `CRISPRessoPlot.prep_alleles_table`.
is_reference : list
List of booleans for whether the read is equal to the reference, from `CRISPRessoPlot.prep_alleles_table`.
fig_filename_root : str
Figure filename to plot (not including '.pdf' or '.png').
custom_colors : dict
Dict of colors to plot (e.g. colors['A'] = (1,0,0,0.4) # red,blue,green,alpha ).
SAVE_ALSO_PNG : bool
Whether to write png file as well.
plot_cut_point : bool
If False, won't draw 'predicted cleavage' line.
cut_point_ind : int
Index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2).
sgRNA_intervals : list
Locations where sgRNAs are located.
sgRNA_names : list
Names of sgRNAs (otherwise empty).
sgRNA_mismatches : list
Array (for each sgRNA_interval) of locations in sgRNA where there are mismatches.
annotate_wildtype_allele : str
String to add to the end of the wildtype allele (e.g. '****' or '').
kwargs : dict
Additional keyword arguments.
Returns
-------
None
"""
if annotate_wildtype_allele != '':
for ix, is_ref in enumerate(is_reference):
if is_ref:
y_labels[ix] += annotate_wildtype_allele

plot_alleles_heatmap(
reference_seq=reference_seq,
fig_filename_root=fig_filename_root,
X=prepped_df_alleles,
annot=annotations,
y_labels=y_labels,
insertion_dict=insertion_dict,
per_element_annot_kws=per_element_annot_kws,
custom_colors=custom_colors,
SAVE_ALSO_PNG=SAVE_ALSO_PNG,
plot_cut_point=plot_cut_point,
cut_point_ind=cut_point_ind,
sgRNA_intervals=sgRNA_intervals,
sgRNA_names=sgRNA_names,
sgRNA_mismatches=sgRNA_mismatches,
)



def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='****',**kwargs):
"""
plots an allele table for a dataframe with allele frequencies
Expand Down

0 comments on commit ed3fec4

Please sign in to comment.