Sam/pyproject setup (#104) (pinellolab#496)

Add the pyproject.toml file and make the Cython files compatible with numpy v2. Co-authored-by: Samuel Nichols <Snic9004@gmail.com>
edilytics · Nov 8, 2024 · 0c82b34 · 0c82b34
1 parent 10be3a4
commit 0c82b34
Show file tree

Hide file tree

Showing 7 changed files with 4,555 additions and 3,545 deletions.
diff --git a/CRISPResso2/CRISPResso2Align.c b/CRISPResso2/CRISPResso2Align.c
diff --git a/CRISPResso2/CRISPResso2Align.pyx b/CRISPResso2/CRISPResso2Align.pyx
@@ -17,9 +17,7 @@ cdef extern from "stdlib.h":
 cdef extern from "Python.h":
     ctypedef void PyObject
 
-ctypedef np.int_t DTYPE_INT
-ctypedef np.uint_t DTYPE_UINT
-ctypedef np.int8_t DTYPE_BOOL
+ctypedef long DTYPE_LONG
 
 cdef size_t UP = 1, LEFT = 2, DIAG = 3, NONE = 4
 cdef size_t MARRAY = 1, IARRAY = 2, JARRAY = 3
@@ -38,7 +36,7 @@ def read_matrix(path):
     The score for a 'C' changing to an 'A' is stored in the matrix as:
         mat[ord('C'), ord('A')] = score
     """
-    cdef np.ndarray[DTYPE_INT, ndim=2] a
+    cdef np.ndarray[DTYPE_LONG, ndim=2] a
     cdef size_t ai = 0, i
     cdef int v, mat_size
 
@@ -50,7 +48,7 @@ def read_matrix(path):
             headers = [ord(x) for x in line.split(' ') if x]
         mat_size = max(headers) + 1
 
-        a = np.zeros((mat_size, mat_size), dtype=int)
+        a = np.zeros((mat_size, mat_size), dtype=long)
 
         line = fh.readline()
         while line:
@@ -72,7 +70,7 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s
     n_mismatch_score: score for matching a nucleotide with 'N'
     n_match_score: score for 'N' matching an 'N'
     """
-    cdef np.ndarray[DTYPE_INT, ndim=2] a
+    cdef np.ndarray[DTYPE_LONG, ndim=2] a
     cdef size_t ai = 0, i
     cdef int v, mat_size
 
@@ -82,7 +80,7 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s
 
     nuc_ords = [ord(x) for x in ['A','T','C','G']]
 
-    a = np.zeros((mat_size, mat_size), dtype=int)
+    a = np.zeros((mat_size, mat_size), dtype=long)
 
     for nuc in nuc_ords:
       for nuc2 in nuc_ords:
@@ -102,8 +100,8 @@ def make_matrix(match_score=5, mismatch_score=-4, n_mismatch_score=-2, n_match_s
 
 @cython.boundscheck(False)
 @cython.nonecheck(False)
-def global_align(str pystr_seqj, str pystr_seqi, np.ndarray[DTYPE_INT, ndim=2] matrix,
-          np.ndarray[DTYPE_INT,ndim=1] gap_incentive, int gap_open=-1,
+def global_align(str pystr_seqj, str pystr_seqi, np.ndarray[DTYPE_LONG, ndim=2] matrix,
+          np.ndarray[DTYPE_LONG,ndim=1] gap_incentive, int gap_open=-1,
           int gap_extend=-1):
     """
     Global sequence alignment (needleman-wunsch) on seq i and j.

diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
@@ -2335,7 +2335,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
 
                     #subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded)
                     this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs'])))
-                    this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
+                    this_include_idxs = sorted(map(int, set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
 
                     refs[ref_name]['gap_incentive'] = this_gap_incentive
                     refs[ref_name]['sgRNA_cut_points'] = this_cut_points
@@ -2360,8 +2360,8 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
                         )
 
                     #subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded)
-                    this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs'])))
-                    this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
+                    this_exclude_idxs = sorted(map(int, set(refs[ref_name]['exclude_idxs'])))
+                    this_include_idxs = sorted(map(int, set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
                     refs[ref_name]['include_idxs'] = this_include_idxs
                     refs[ref_name]['exclude_idxs'] = this_exclude_idxs
 
@@ -3361,11 +3361,15 @@ def calculate_99_max(d):
             ref_info_file.write(refString)
             np.set_printoptions(linewidth=1000**1000) #no line breaks
             for ref_name in ref_names:
+                if isinstance(refs[ref_name]['include_idxs'], np.ndarray):
+                    refs[ref_name]['include_idxs'] = refs[ref_name]['include_idxs'].tolist()
+                if isinstance(refs[ref_name]['exclude_idxs'], np.ndarray):
+                    refs[ref_name]['exclude_idxs'] = refs[ref_name]['exclude_idxs'].tolist()
                 refString = ( refs[ref_name]['name'] + "\t" +
                     str(refs[ref_name]['sequence']) + "\t" +
                     str(refs[ref_name]['sequence_length']) + "\t" +
                     str(refs[ref_name]['min_aln_score']) + "\t" +
-                    str(refs[ref_name]['gap_incentive']) + "\t" +
+                    str(refs[ref_name]['gap_incentive'].tolist()) + "\t" +
                     str(refs[ref_name]['sgRNA_cut_points']) + "\t" +
                     str(refs[ref_name]['sgRNA_plot_cut_points']) + "\t" +
                     str(refs[ref_name]['sgRNA_intervals']) + "\t" +