Skip to content

Commit

Permalink
Merge branch 'master' into wgs-whole-region-deleted-v2.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Colelyman committed Dec 6, 2024
2 parents 1f175dd + 0232b06 commit 7dbd5eb
Show file tree
Hide file tree
Showing 14 changed files with 30,096 additions and 21,891 deletions.
48,654 changes: 28,071 additions & 20,583 deletions CRISPResso2/CRISPResso2Align.c

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions CRISPResso2/CRISPResso2Align.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ cdef extern from "stdlib.h":
cdef extern from "Python.h":
ctypedef void PyObject

ctypedef np.int_t DTYPE_INT
ctypedef np.uint_t DTYPE_UINT
ctypedef np.int8_t DTYPE_BOOL
ctypedef long DTYPE_LONG

cdef size_t UP = 1, LEFT = 2, DIAG = 3, NONE = 4
cdef size_t MARRAY = 1, IARRAY = 2, JARRAY = 3
Expand All @@ -38,7 +36,7 @@ def read_matrix(path):
The score for a 'C' changing to an 'A' is stored in the matrix as:
mat[ord('C'), ord('A')] = score
"""
cdef np.ndarray[DTYPE_INT, ndim=2] a
cdef np.ndarray[DTYPE_LONG, ndim=2] a
cdef size_t ai = 0, i
cdef int v, mat_size

Expand All @@ -50,7 +48,7 @@ def read_matrix(path):
headers = [ord(x) for x in line.split(' ') if x]
mat_size = max(headers) + 1

a = np.zeros((mat_size, mat_size), dtype=int)
a = np.zeros((mat_size, mat_size), dtype=long)

line = fh.readline()
while line:
Expand All @@ -72,7 +70,7 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s
n_mismatch_score: score for matching a nucleotide with 'N'
n_match_score: score for 'N' matching an 'N'
"""
cdef np.ndarray[DTYPE_INT, ndim=2] a
cdef np.ndarray[DTYPE_LONG, ndim=2] a
cdef size_t ai = 0, i
cdef int v, mat_size

Expand All @@ -82,7 +80,7 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s

nuc_ords = [ord(x) for x in ['A','T','C','G']]

a = np.zeros((mat_size, mat_size), dtype=int)
a = np.zeros((mat_size, mat_size), dtype=long)

for nuc in nuc_ords:
for nuc2 in nuc_ords:
Expand All @@ -102,8 +100,8 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s

@cython.boundscheck(False)
@cython.nonecheck(False)
def global_align(str pystr_seqj, str pystr_seqi, np.ndarray[DTYPE_INT, ndim=2] matrix,
np.ndarray[DTYPE_INT,ndim=1] gap_incentive, int gap_open=-1,
def global_align(str pystr_seqj, str pystr_seqi, np.ndarray[DTYPE_LONG, ndim=2] matrix,
np.ndarray[DTYPE_LONG,ndim=1] gap_incentive, int gap_open=-1,
int gap_extend=-1):
"""
Global sequence alignment (needleman-wunsch) on seq i and j.
Expand Down
16 changes: 10 additions & 6 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,9 +819,9 @@ def process_bam(bam_filename, bam_chr_loc, output_bam, variantCache, ref_names,
crispresso_cmd_to_write = ' '.join(sys.argv)
sam_out.write('@PG\tID:crispresso2\tPN:crispresso2\tVN:'+CRISPRessoShared.__version__+'\tCL:"'+crispresso_cmd_to_write+'"\n')
if bam_chr_loc != "":
proc = sb.Popen(['samtools', 'view', bam_filename, bam_chr_loc], stdout=sb.PIPE, encoding='utf-8')
proc = sb.Popen(['samtools', 'view', '-F', args.samtools_exclude_flags, bam_filename, bam_chr_loc], stdout=sb.PIPE, encoding='utf-8')
else:
proc = sb.Popen(['samtools', 'view', bam_filename], stdout=sb.PIPE, encoding='utf-8')
proc = sb.Popen(['samtools', 'view', '-F', args.samtools_exclude_flags, bam_filename], stdout=sb.PIPE, encoding='utf-8')
num_reads = 0

# Reading through the bam file and enriching variantCache as a dictionary with the following:
Expand Down Expand Up @@ -2335,7 +2335,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited

#subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded)
this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs'])))
this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
this_include_idxs = sorted(map(int, set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))

refs[ref_name]['gap_incentive'] = this_gap_incentive
refs[ref_name]['sgRNA_cut_points'] = this_cut_points
Expand All @@ -2360,8 +2360,8 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
)

#subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded)
this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs'])))
this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
this_exclude_idxs = sorted(map(int, set(refs[ref_name]['exclude_idxs'])))
this_include_idxs = sorted(map(int, set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
refs[ref_name]['include_idxs'] = this_include_idxs
refs[ref_name]['exclude_idxs'] = this_exclude_idxs

Expand Down Expand Up @@ -3361,11 +3361,15 @@ def calculate_99_max(d):
ref_info_file.write(refString)
np.set_printoptions(linewidth=1000**1000) #no line breaks
for ref_name in ref_names:
if isinstance(refs[ref_name]['include_idxs'], np.ndarray):
refs[ref_name]['include_idxs'] = refs[ref_name]['include_idxs'].tolist()
if isinstance(refs[ref_name]['exclude_idxs'], np.ndarray):
refs[ref_name]['exclude_idxs'] = refs[ref_name]['exclude_idxs'].tolist()
refString = ( refs[ref_name]['name'] + "\t" +
str(refs[ref_name]['sequence']) + "\t" +
str(refs[ref_name]['sequence_length']) + "\t" +
str(refs[ref_name]['min_aln_score']) + "\t" +
str(refs[ref_name]['gap_incentive']) + "\t" +
str(refs[ref_name]['gap_incentive'].tolist()) + "\t" +
str(refs[ref_name]['sgRNA_cut_points']) + "\t" +
str(refs[ref_name]['sgRNA_plot_cut_points']) + "\t" +
str(refs[ref_name]['sgRNA_intervals']) + "\t" +
Expand Down
Loading

0 comments on commit 7dbd5eb

Please sign in to comment.