Skip to content

Commit

Permalink
Cole/fix status file name (#69) (pinellolab#430)
Browse files Browse the repository at this point in the history
* Update config file logging messages

This removes printing the exception (which is essentially a duplicate),
and adds a condition if no config file was provided. Also changes `json`
to `config` so that it is more clear.

* Fix divide by zero when no amplicons are present in Batch mode

* Don't append file_prefix to status file name

* Place status files in output directories

* Update tests branch for file_prefix addition

* Load D3 and plotly figures with pro with multiple amplicons

* Update batch

* Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix

Before this fix, when using a file_prefix the second run that was compared
would not be displayed as a data in the first figure of the report.

* Import CRISPRessoPro instead of importing the version

When installed via conda, the version is not available

* Remove `get_amplicon_output` unused function from CRISPRessoCompare

Also remove unused argparse import

* Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests

* Allow for matching of multiple guides in the same amplicon

* Fix pandas FutureWarning

* Change test branch back to master

---------

Co-authored-by: Sam <snic9004@gmail.com>
  • Loading branch information
Colelyman and Snicker7 authored May 1, 2024
1 parent 3ec22e5 commit 38fd76d
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 95 deletions.
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoAggregateCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def main():

parser.add_argument('--debug', help='Show debug messages', action='store_true')
parser.add_argument('-v', '--verbosity', type=int, help='Verbosity level of output to the console (1-4), 4 is the most verbose', default=3)

# CRISPRessoPro params
parser.add_argument('--use_matplotlib', action='store_true',
help='Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed')
Expand All @@ -98,7 +98,7 @@ def main():

log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt')
logger.addHandler(logging.FileHandler(log_filename))
logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoAggregate_status.json')))
logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoAggregate_status.json')))

with open(log_filename, 'w+') as outfile:
outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))
Expand Down
21 changes: 12 additions & 9 deletions CRISPResso2/CRISPRessoBatchCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from CRISPResso2.CRISPRessoReports import CRISPRessoReport

if CRISPRessoShared.is_C2Pro_installed():
from CRISPRessoPro import __version__ as CRISPRessoProVersion
import CRISPRessoPro
C2PRO_INSTALLED = True
else:
C2PRO_INSTALLED = False
Expand Down Expand Up @@ -127,7 +127,7 @@ def main():

log_filename = _jp('CRISPRessoBatch_RUNNING_LOG.txt')
logger.addHandler(logging.FileHandler(log_filename))
status_handler = CRISPRessoShared.StatusHandler(_jp('CRISPRessoBatch_status.json'))
status_handler = CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoBatch_status.json'))
logger.addHandler(status_handler)

with open(log_filename, 'w+') as outfile:
Expand Down Expand Up @@ -178,7 +178,7 @@ def main():
'plot_window_size', 'max_rows_alleles_around_cut_to_plot']
for int_col in int_columns:
if int_col in batch_params.columns:
batch_params[int_col].fillna(getattr(args, int_col), inplace=True)
batch_params.fillna(value={int_col: getattr(args, int_col)}, inplace=True)
batch_params[int_col] = batch_params[int_col].astype(int)

# rename column "a" to "amplicon_seq", etc
Expand Down Expand Up @@ -398,7 +398,10 @@ def main():
large_plot_cutoff = 300

percent_complete_start, percent_complete_end = 90, 99
percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
if all_amplicons:
percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
else:
percent_complete_step = 0
# report for amplicons
for amplicon_index, amplicon_seq in enumerate(all_amplicons):
# only perform comparison if amplicon seen in more than one sample
Expand Down Expand Up @@ -604,7 +607,7 @@ def main():
# and add it to the list
sub_sgRNA_intervals.append((newstart, newend))

this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
nucleotide_quilt_input = {
'nuc_pct_df': sub_nucleotide_percentage_summary_df,
'mod_pct_df': sub_modification_percentage_summary_df,
Expand All @@ -622,8 +625,6 @@ def main():
plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name)
window_nuc_pct_quilt_plot_names.append(plot_name)
crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'sgRNA: ' + sgRNA + ' Amplicon: ' + amplicon_name
if len(consensus_guides) == 1:
crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = ''
crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name
crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]

Expand Down Expand Up @@ -656,7 +657,7 @@ def main():
# done with per-sgRNA plots

if not args.suppress_plots and not args.suppress_batch_summary_plots: # plot the whole region
this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
nucleotide_quilt_input = {
'nuc_pct_df': nucleotide_percentage_summary_df,
'mod_pct_df': modification_percentage_summary_df,
Expand Down Expand Up @@ -706,7 +707,7 @@ def main():

else: # guides are not the same
if not args.suppress_plots and not args.suppress_batch_summary_plots:
this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
nucleotide_quilt_input = {
'nuc_pct_df': nucleotide_percentage_summary_df,
'mod_pct_df': modification_percentage_summary_df,
Expand Down Expand Up @@ -775,6 +776,7 @@ def main():
'sample_sgRNA_intervals': sgRNA_intervals,
'plot_path': plot_path,
'title': modification_type,
'amplicon_name': amplicon_name,
}
debug('Plotting allele modification heatmap for {0}'.format(amplicon_name))
plot(
Expand Down Expand Up @@ -806,6 +808,7 @@ def main():
'sample_sgRNA_intervals': sgRNA_intervals,
'plot_path': plot_path,
'title': modification_type,
'amplicon_name': amplicon_name,
}
debug('Plotting allele modification line plot for {0}'.format(amplicon_name))
plot(
Expand Down
2 changes: 1 addition & 1 deletion CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug():
with open(log_filename, 'w+') as outfile:
outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write))

logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPResso_status.json')))
logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_status.json')))

aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL")
CRISPRessoShared.check_file(aln_matrix_loc)
Expand Down
Loading

0 comments on commit 38fd76d

Please sign in to comment.