Skip to content

Commit

Permalink
All sub-crispresso processes are run with 1 process
Browse files Browse the repository at this point in the history
  • Loading branch information
kclem committed Oct 20, 2021
1 parent 75205b0 commit a923a7c
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 50 deletions.
13 changes: 9 additions & 4 deletions CRISPResso2/CRISPRessoBatchCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,14 @@ def main():

crispresso2_info['running_info']['log_filename'] = os.path.basename(log_filename)

#this is potentially the square-root of the input n_processes because sub-CRISPResso commands will take some processes as well
orig_n_processes = args.n_processes
args.n_processes = CRISPRessoShared.get_sub_n_processes(suppress_plots=args.suppress_plots, suppress_report=args.suppress_report, n_processes=args.n_processes)
n_processes_for_batch = 1
if args.n_processes == "max":
n_processes_for_batch = CRISPRessoMultiProcessing.get_max_processes()
else:
n_processes_for_batch = int(args.n_processes)

#this value will be propagated to sub-commands, so set it as 1 here
args.n_processes = 1

crispresso_cmd_to_write = ' '.join(sys.argv)
if args.write_cleaned_report:
Expand Down Expand Up @@ -279,7 +284,7 @@ def main():
crispresso2_info['results']['batch_names_arr'] = batch_names_arr
crispresso2_info['results']['batch_input_names'] = batch_input_names

CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, args.n_processes, 'batch', args.skip_failed)
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_batch, 'batch', args.skip_failed)

run_datas = [] #crispresso2 info from each row

Expand Down
12 changes: 6 additions & 6 deletions CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,15 +350,15 @@ def main():
if args.alternate_alleles:
CRISPRessoShared.check_file(args.alternate_alleles)

#for computation performed in CRISPressoPooled (e.g. bowtie alignment, etc) use n_processes_for_pooled
# for computation performed in CRISPRessoPooled (e.g. bowtie alignment, etc) use n_processes_for_pooled
n_processes_for_pooled = 1
if args.n_processes == "max":
n_processes_for_pooled = CRISPRessoMultiProcessing.get_max_processes()
else:
n_processes_for_pooled = int(args.n_processes)

#here, we set args.n_processes as another value because this value is propagated to sub-CRISPResso runs (not for usage in CRISPRessoPooled)
args.n_processes = CRISPRessoShared.get_sub_n_processes(suppress_plots=args.suppress_plots, suppress_report=args.suppress_report, n_processes=args.n_processes)
# here, we set args.n_processes as 1 because this value is propagated to sub-CRISPResso runs (not for usage in CRISPRessoWGS)
args.n_processes = 1

####TRIMMING AND MERGING
get_name_from_fasta=lambda x: os.path.basename(x).replace('.fastq', '').replace('.gz', '').replace('.fq', '')
Expand Down Expand Up @@ -762,7 +762,7 @@ def main():
else:
warn('Skipping amplicon [%s] because no reads align to it\n'% idx)

CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, args.n_processes, 'amplicon', args.skip_failed)
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_pooled, 'amplicon', args.skip_failed)

df_template['n_reads']=n_reads_aligned_amplicons
df_template['n_reads_aligned_%']=df_template['n_reads']/float(N_READS_ALIGNED)*100
Expand Down Expand Up @@ -1086,7 +1086,7 @@ def main():
n_reads_aligned_genome.append(0)
warn("The amplicon %s doesn't have any reads mapped to it!\n Please check your amplicon sequence." % idx)

CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, args.n_processes, 'amplicon', args.skip_failed)
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_pooled, 'amplicon', args.skip_failed)

crispresso2_info['running_info']['finished_steps']['crispresso_amplicons_and_genome'] = (n_reads_aligned_genome, fastq_region_filenames, files_to_match)
CRISPRessoShared.write_crispresso_info(
Expand Down Expand Up @@ -1195,7 +1195,7 @@ def main():
crispresso_cmds.append(crispresso_cmd)
else:
info('Skipping region: %s-%d-%d , not enough reads (%d)' %(row.chr_id, row.bpstart, row.bpend, row.n_reads))
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, args.n_processes, 'region', args.skip_failed)
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_pooled, 'region', args.skip_failed)

crispresso2_info['running_info']['finished_steps']['crispresso_genome_only'] = True
CRISPRessoShared.write_crispresso_info(
Expand Down
39 changes: 3 additions & 36 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from CRISPResso2.CRISPRessoMultiProcessing import get_max_processes


__version__ = "2.2.5"
__version__ = "2.2.6"

###EXCEPTIONS############################
class FlashException(Exception):
Expand Down Expand Up @@ -64,6 +64,8 @@ class AutoException(Exception):
class OutputFolderIncompleteException(Exception):
pass

class InstallationException(Exception):
pass

#########################################

Expand Down Expand Up @@ -251,41 +253,6 @@ def propagate_crispresso_options(cmd, options, params):
cmd+=' --%s %s' % (option, str(val))
return cmd


def get_sub_n_processes(suppress_plots=False, suppress_report=False, n_processes=1):
"""Determine how many sub processes to run for sub CRISPResso commands.
For CRISPRessoBatch, CRISPRessoPooled, etc, some demultiplexing or preprocessing is performed, and then demultiplexed samples are analyzed with CRISPResso (a sub-CRISPResso command)
This function determines how many processes to run per sub-CRISPresso command
If no plotting or reports are going to be generated, it just runs with 1 processes (because there's no speedup at this point)
If plotting is to be performed, sqrt(n_processes) are returned
Parameters
----------
suppress_plots : bool
Whether to suppress plots in output (default: False)
suppress_report : bool
Whether to suppress report in output (default: False)
n_processes : str of a number (e.g. 1,2,3,4) or 'max'
The number of processes to use (default: 1)
Returns
-------
int
The number of processes to run per sub-crispresso command
"""
n_processes = str(n_processes)
if suppress_plots or suppress_report:
return 1
else:
if n_processes == 'max':
n_processes = get_max_processes()
else:
n_processes = int(n_processes)
return max(1, floor(sqrt(n_processes)))


#######
# Sequence functions
#######
Expand Down
8 changes: 4 additions & 4 deletions CRISPResso2/CRISPRessoWGSCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,15 +309,15 @@ def print_stacktrace_if_debug():
if args.gene_annotations:
check_file(args.gene_annotations)

# for computation performed in CRISPressoPooled (e.g. bowtie alignment, etc) use n_processes_for_wgs
# for computation performed in CRISPRessoWGS (e.g. bowtie alignment, etc) use n_processes_for_wgs
n_processes_for_wgs = 1
if args.n_processes == "max":
n_processes_for_wgs = CRISPRessoMultiProcessing.get_max_processes()
else:
n_processes_for_wgs = int(args.n_processes)

# here, we set args.n_processes as another value because this value is propagated to sub-CRISPResso runs (not for usage in CRISPRessoWGS)
args.n_processes = CRISPRessoShared.get_sub_n_processes(suppress_plots=args.suppress_plots, suppress_report=args.suppress_report, n_processes=args.n_processes)
# here, we set args.n_processes as 1 because this value is propagated to sub-CRISPResso runs (not for usage in CRISPRessoWGS)
args.n_processes = 1

#INIT
get_name_from_bam=lambda x: os.path.basename(x).replace('.bam', '')
Expand Down Expand Up @@ -592,7 +592,7 @@ def set_filenames(row):
else:
info('\nThe region [%s] has too few reads mapped to it (%d)! Not running CRISPResso!' % (idx, row['n_reads']))

CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, args.n_processes, 'region', args.skip_failed)
CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_wgs, 'region', args.skip_failed)

quantification_summary=[]
all_region_names = []
Expand Down

0 comments on commit a923a7c

Please sign in to comment.