Cole/fix status file name (#69) (pinellolab#430)

* Update config file logging messages This removes printing the exception (which is essentially a duplicate), and adds a condition if no config file was provided. Also changes `json` to `config` so that it is more clear. * Fix divide by zero when no amplicons are present in Batch mode * Don't append file_prefix to status file name * Place status files in output directories * Update tests branch for file_prefix addition * Load D3 and plotly figures with pro with multiple amplicons * Update batch * Fix bug in CRISPRessoCompare with pointing to report datas with file_prefix Before this fix, when using a file_prefix the second run that was compared would not be displayed as a data in the first figure of the report. * Import CRISPRessoPro instead of importing the version When installed via conda, the version is not available * Remove `get_amplicon_output` unused function from CRISPRessoCompare Also remove unused argparse import * Implement `get_matching_allele_files` in CRISPRessoCompare and accompanying unit tests * Allow for matching of multiple guides in the same amplicon * Fix pandas FutureWarning * Change test branch back to master --------- Co-authored-by: Sam <snic9004@gmail.com>
edilytics · May 1, 2024 · 38fd76d · 38fd76d
1 parent 3ec22e5
commit 38fd76d
Show file tree

Hide file tree

Showing 12 changed files with 193 additions and 95 deletions.
diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -71,7 +71,7 @@ def main():
 
         parser.add_argument('--debug', help='Show debug messages', action='store_true')
         parser.add_argument('-v', '--verbosity', type=int, help='Verbosity level of output to the console (1-4), 4 is the most verbose', default=3)
-        
+
         # CRISPRessoPro params
         parser.add_argument('--use_matplotlib', action='store_true',
                         help='Use matplotlib for plotting instead of plotly/d3 when CRISPRessoPro is installed')
@@ -98,7 +98,7 @@ def main():
 
         log_filename=_jp('CRISPRessoAggregate_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPRessoAggregate_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoAggregate_status.json')))
 
         with open(log_filename, 'w+') as outfile:
               outfile.write('[Command used]:\n%s\n\n[Execution log]:\n' % ' '.join(sys.argv))

diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -18,7 +18,7 @@
 from CRISPResso2.CRISPRessoReports import CRISPRessoReport
 
 if CRISPRessoShared.is_C2Pro_installed():
-    from CRISPRessoPro import __version__ as CRISPRessoProVersion
+    import CRISPRessoPro
     C2PRO_INSTALLED = True
 else:
     C2PRO_INSTALLED = False
@@ -127,7 +127,7 @@ def main():
 
         log_filename = _jp('CRISPRessoBatch_RUNNING_LOG.txt')
         logger.addHandler(logging.FileHandler(log_filename))
-        status_handler = CRISPRessoShared.StatusHandler(_jp('CRISPRessoBatch_status.json'))
+        status_handler = CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPRessoBatch_status.json'))
         logger.addHandler(status_handler)
 
         with open(log_filename, 'w+') as outfile:
@@ -178,7 +178,7 @@ def main():
                        'plot_window_size', 'max_rows_alleles_around_cut_to_plot']
         for int_col in int_columns:
             if int_col in batch_params.columns:
-                batch_params[int_col].fillna(getattr(args, int_col), inplace=True)
+                batch_params.fillna(value={int_col: getattr(args, int_col)}, inplace=True)
                 batch_params[int_col] = batch_params[int_col].astype(int)
 
         # rename column "a" to "amplicon_seq", etc
@@ -398,7 +398,10 @@ def main():
         large_plot_cutoff = 300
 
         percent_complete_start, percent_complete_end = 90, 99
-        percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
+        if all_amplicons:
+            percent_complete_step = (percent_complete_end - percent_complete_start) / len(all_amplicons)
+        else:
+            percent_complete_step = 0
         # report for amplicons
         for amplicon_index, amplicon_seq in enumerate(all_amplicons):
             # only perform comparison if amplicon seen in more than one sample
@@ -604,7 +607,7 @@ def main():
                                 # and add it to the list
                                 sub_sgRNA_intervals.append((newstart, newend))
 
-                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
+                            this_window_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt_around_sgRNA_'+sgRNA)
                             nucleotide_quilt_input = {
                                 'nuc_pct_df': sub_nucleotide_percentage_summary_df,
                                 'mod_pct_df': sub_modification_percentage_summary_df,
@@ -622,8 +625,6 @@ def main():
                             plot_name = os.path.basename(this_window_nuc_pct_quilt_plot_name)
                             window_nuc_pct_quilt_plot_names.append(plot_name)
                             crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = 'sgRNA: ' + sgRNA + ' Amplicon: ' + amplicon_name
-                            if len(consensus_guides) == 1:
-                                crispresso2_info['results']['general_plots']['summary_plot_titles'][plot_name] = ''
                             crispresso2_info['results']['general_plots']['summary_plot_labels'][plot_name] = 'Composition of each base around the guide ' + sgRNA + ' for the amplicon ' + amplicon_name
                             crispresso2_info['results']['general_plots']['summary_plot_datas'][plot_name] = [('Nucleotide frequencies', os.path.basename(nucleotide_frequency_summary_filename)), ('Modification frequencies', os.path.basename(modification_frequency_summary_filename))]
 
@@ -656,7 +657,7 @@ def main():
                         # done with per-sgRNA plots
 
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:  # plot the whole region
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,
@@ -706,7 +707,7 @@ def main():
 
                 else:  # guides are not the same
                     if not args.suppress_plots and not args.suppress_batch_summary_plots:
-                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name + 'Nucleotide_percentage_quilt')
+                        this_nuc_pct_quilt_plot_name = _jp(amplicon_plot_name.replace('.', '') + 'Nucleotide_percentage_quilt')
                         nucleotide_quilt_input = {
                             'nuc_pct_df': nucleotide_percentage_summary_df,
                             'mod_pct_df': modification_percentage_summary_df,
@@ -775,6 +776,7 @@ def main():
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
+                            'amplicon_name': amplicon_name,
                         }
                         debug('Plotting allele modification heatmap for {0}'.format(amplicon_name))
                         plot(
@@ -806,6 +808,7 @@ def main():
                             'sample_sgRNA_intervals': sgRNA_intervals,
                             'plot_path': plot_path,
                             'title': modification_type,
+                            'amplicon_name': amplicon_name,
                         }
                         debug('Plotting allele modification line plot for {0}'.format(amplicon_name))
                         plot(

diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
@@ -1288,7 +1288,7 @@ def print_stacktrace_if_debug():
             with open(log_filename, 'w+') as outfile:
                 outfile.write('CRISPResso version %s\n[Command used]:\n%s\n\n[Execution log]:\n' %(CRISPRessoShared.__version__, crispresso_cmd_to_write))
 
-        logger.addHandler(CRISPRessoShared.StatusHandler(_jp('CRISPResso_status.json')))
+        logger.addHandler(CRISPRessoShared.StatusHandler(os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_status.json')))
 
         aln_matrix_loc = os.path.join(_ROOT, "EDNAFULL")
         CRISPRessoShared.check_file(aln_matrix_loc)