Skip to content

Commit c1f78ee

Browse files
authoredNov 2, 2024
also handle iupac codes in ReverseComplement()
1 parent 8952dcf commit c1f78ee

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed
 

‎trtools/utils/tests/test_utils.py

+12
Original file line numberDiff line numberDiff line change
@@ -128,13 +128,25 @@ def test_GetCanonicalOneStrand():
128128
assert(utils.GetCanonicalOneStrand("TTGTT")=="GTTTT")
129129
assert(utils.GetCanonicalOneStrand("")=="")
130130
assert(utils.GetCanonicalOneStrand("at")=="AT")
131+
# Additional tests with IUPAC codes
132+
assert(utils.GetCanonicalOneStrand("RY")=="RY")
133+
assert(utils.GetCanonicalOneStrand("YR")=="RY")
134+
assert(utils.GetCanonicalOneStrand("SW")=="SW")
135+
assert(utils.GetCanonicalOneStrand("WS")=="SW")
136+
assert(utils.GetCanonicalOneStrand("KM")=="KM")
137+
assert(utils.GetCanonicalOneStrand("MK")=="KM")
131138

132139
# ReverseComplement
133140
def test_ReverseComplement():
134141
assert(utils.ReverseComplement("CGAT")=="ATCG")
135142
assert(utils.ReverseComplement("")=="")
136143
assert(utils.ReverseComplement("CGNT")=="ANCG")
137144
assert(utils.ReverseComplement("ccga")=="TCGG")
145+
# additional tests with IUPAC codes
146+
assert(utils.ReverseComplement("RYASWKM")=="KMWSTRY")
147+
# also test the characters that don't change
148+
assert(utils.ReverseComplement("BDHV")=="BDHV")
149+
assert(utils.ReverseComplement("N")=="N")
138150

139151
# InferRepeatSequence
140152
def test_InferRepeatSequence():

‎trtools/utils/utils.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ def GetCanonicalOneStrand(repseq):
428428
def ReverseComplement(seq):
429429
r"""Get reverse complement of a sequence.
430430
431-
Converts everything to uppsercase.
431+
Converts everything to uppercase and handles IUPAC codes.
432432
433433
Parameters
434434
----------
@@ -444,21 +444,19 @@ def ReverseComplement(seq):
444444
--------
445445
>>> ReverseComplement("AGGCT")
446446
'AGCCT'
447+
>>> ReverseComplement("AGGCTRY")
448+
'RAGCCT'
447449
"""
450+
iupac_complement = {
451+
'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A',
452+
'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W',
453+
'K': 'M', 'M': 'K', 'B': 'V', 'D': 'H',
454+
'H': 'D', 'V': 'B', 'N': 'N'
455+
}
448456
seq = seq.upper()
449457
newseq = ""
450-
size = len(seq)
451-
for i in range(len(seq)):
452-
char = seq[len(seq)-i-1]
453-
if char == "A":
454-
newseq += "T"
455-
elif char == "G":
456-
newseq += "C"
457-
elif char == "C":
458-
newseq += "G"
459-
elif char == "T":
460-
newseq += "A"
461-
else: newseq += "N"
458+
for char in reversed(seq):
459+
newseq += iupac_complement.get(char, 'N')
462460
return newseq
463461

464462
def InferRepeatSequence(seq, period):

0 commit comments

Comments
 (0)