Skip to content

Commit

Permalink
Move get_n_reads_fastq to CRISPRessoShared
Browse files Browse the repository at this point in the history
  • Loading branch information
Colelyman committed Aug 28, 2024
1 parent 9b66ea4 commit 3eba615
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 18 deletions.
7 changes: 2 additions & 5 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ def get_avg_read_length_fastq(fastq_filename):
p = sb.Popen(cmd, shell=True, stdout=sb.PIPE)
return int(p.communicate()[0].strip())

def get_n_reads_fastq(fastq_filename):
p = sb.Popen('gunzip -c' if fastq_filename.endswith('.gz') else 'cat' + ' < "%s" | wc -l' % fastq_filename, shell=True, stdout=sb.PIPE)
return round(float(p.communicate()[0]) / 4.0)

def get_n_reads_bam(bam_filename,bam_chr_loc=""):
cmd = "samtools view -c " + bam_filename + " " + bam_chr_loc
Expand Down Expand Up @@ -2458,7 +2455,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited

N_READS_INPUT = 0
if args.fastq_r1:
N_READS_INPUT = get_n_reads_fastq(args.fastq_r1)
N_READS_INPUT = CRISPRessoShared.get_n_reads_fastq(args.fastq_r1)
elif args.bam_input:
N_READS_INPUT = get_n_reads_bam(args.bam_input, args.bam_chr_loc)

Expand Down Expand Up @@ -2620,7 +2617,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
if args.bam_input:
N_READS_AFTER_PREPROCESSING = N_READS_INPUT
else:
N_READS_AFTER_PREPROCESSING=get_n_reads_fastq(processed_output_filename)
N_READS_AFTER_PREPROCESSING = CRISPRessoShared.get_n_reads_fastq(processed_output_filename)
if N_READS_AFTER_PREPROCESSING == 0:
raise CRISPRessoShared.NoReadsAfterQualityFilteringException('No reads in input or no reads survived the average or single bp quality filtering.')

Expand Down
12 changes: 3 additions & 9 deletions CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,6 @@ def get_read_length_from_cigar(cigar_string):
result += int(length)
return result

def get_n_reads_fastq(fastq_filename):
p = sb.Popen(('z' if fastq_filename.endswith('.gz') else '' ) +"cat < %s | wc -l" % fastq_filename, shell=True, stdout=sb.PIPE)
n_reads = int(float(p.communicate()[0])/4.0)
return n_reads

def get_n_reads_bam(bam_filename):
p = sb.Popen("samtools view -c %s" % bam_filename, shell=True, stdout=sb.PIPE)
Expand Down Expand Up @@ -611,10 +607,10 @@ def main():
N_READS_INPUT = get_n_reads_bam(args.aligned_pooled_bam)
N_READS_AFTER_PREPROCESSING = N_READS_INPUT
else:
N_READS_INPUT = get_n_reads_fastq(args.fastq_r1)
N_READS_INPUT = CRISPRessoShared.get_n_reads_fastq(args.fastq_r1)
if args.split_interleaved_input:
N_READS_INPUT /= 2
N_READS_AFTER_PREPROCESSING = get_n_reads_fastq(processed_output_filename)
N_READS_AFTER_PREPROCESSING = CRISPRessoShared.get_n_reads_fastq(processed_output_filename)

crispresso2_info['running_info']['finished_steps']['count_input_reads'] = (N_READS_INPUT, N_READS_AFTER_PREPROCESSING)
CRISPRessoShared.write_crispresso_info(
Expand Down Expand Up @@ -852,7 +848,7 @@ def main():
n_reads_aligned_amplicons=[]
crispresso_cmds = []
for idx, row in df_template.iterrows():
this_n_reads = get_n_reads_fastq(row['Demultiplexed_fastq.gz_filename'])
this_n_reads = CRISPRessoShared.get_n_reads_fastq(row['Demultiplexed_fastq.gz_filename'])
n_reads_aligned_amplicons.append(this_n_reads)
info('\n Processing:%s with %d reads'%(idx,this_n_reads))
this_amp_seq = row['amplicon_seq']
Expand Down Expand Up @@ -1552,8 +1548,6 @@ def rreplace(s, old, new):

#if many reads weren't aligned, print those out for the user
if RUNNING_MODE != 'ONLY_GENOME':
#N_READS_INPUT=get_n_reads_fastq(args.fastq_r1)
#N_READS_AFTER_PREPROCESSING=get_n_reads_fastq(processed_output_filename)
tot_reads_aligned = df_summary_quantification['Reads_aligned'].fillna(0).sum()
tot_reads = df_summary_quantification['Reads_total'].sum()

Expand Down
6 changes: 6 additions & 0 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,12 @@ def get_command_output(command):
break


def get_n_reads_fastq(fastq_filename):
"""Count the number of reads from fastq_filename."""
p = sb.Popen('gunzip -c' if fastq_filename.endswith('.gz') else 'cat' + ' < "%s" | wc -l' % fastq_filename, shell=True, stdout=sb.PIPE)
return round(float(p.communicate()[0]) / 4.0)


def get_most_frequent_reads(fastq_r1, fastq_r2, number_of_reads_to_consider, fastp_command, min_paired_end_reads_overlap, split_interleaved_input=False, debug=False):
"""
Get the most frequent amplicon from a fastq file (or after merging a r1 and r2 fastq file).
Expand Down
4 changes: 0 additions & 4 deletions CRISPResso2/CRISPRessoWGSCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,6 @@ def write_trimmed_fastq(in_bam_filename, bpstart, bpend, out_fastq_filename):
pd=check_library('pandas')
np=check_library('numpy')

def get_n_reads_fastq(fastq_filename):
p = sb.Popen(('z' if fastq_filename.endswith('.gz') else '' ) +"cat < %s | wc -l" % fastq_filename, shell=True, stdout=sb.PIPE)
n_reads = int(float(p.communicate()[0])/4.0)
return n_reads

def extract_reads(row):
if row.sequence:
Expand Down

0 comments on commit 3eba615

Please sign in to comment.