-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmer.py
38 lines (30 loc) · 8.83 KB
/
kmer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
f = open("output.txt", "w")
def count_ks(string, sub_string):
count = 0
sub_string_length = len(sub_string)
for i in range(0, len(string) - sub_string_length + 1):
if string[i:i+sub_string_length] == sub_string:
count = count + 1
return count
def k_mer(s, k):
ls = []
curr_count = 0
for i in range(0,len(s) - k + 1):
curr_count_ks = count_ks(s, s[i:i+k])
if curr_count_ks > curr_count and curr_count_ks > 1:
ls = [s[i:i+k]]
curr_count = curr_count_ks
elif curr_count_ks == curr_count and s[i:i+k] not in ls and curr_count_ks > 1:
ls.append(s[i:i+k])
else:
pass
return ls, curr_count
def patch(s, k, L, t):
ls = []
for i in range(len(s) - L + 1):
current = k_mer(s[i:i+L], k)
if current[1] == t and current[0][0] not in ls:
ls.append(current[0][0])
return ls
for i in patch("AGACAACGAAGTTAGTTAGATCCGCGGCGCACCCCAGCAGTATTGGTATCGTATAGCAATAGACGGCCCAACGCGCTGGTGCGCAAAATTGACAAAGTCGACACGAGGTCGCTATCTGCCAATCAGCAGGGTTGCCCGAGAGCCTCCAACCGAGCCAACACCGGCAACATTTGGGCGAGCGGGGGGGCATTCACTGTAAAGATGTCCGGCCGCGTGACGAAGGGGACATGGGATCCGAACAATTACGCATATGAAAGGAACCTGGCAAAGGCTGCGCGCCTATCCTAATCTCTTAGACAGCGTAGTCGAAACTGGACTGGCCCAACTACAAATGAGAAACCGACAGGTTACATAGGGTAATCAACCCCCTGATATTGAAAGCCACGAGTCATAAAGTGGCTTTTTGATAAGTGAGTTTAGTGTCTTTAGTCGTTATTGCTTAACGCCAACCGCAGACATAAGTTCGCGGAATGCCCTTTTCTATAGTTGAAAAAGGGTCACCCACATTTAGGGGGTTCGATCATGGGGTCAGTACCACCATTGGAGTAGGGTCAGTACCGTACGGGTCAGTACCGCGAGGGGTCAGTACCTGCCAGCAAGTTTCTTGCCTACTACCCCAGCAGGACAGCGGGTCAGTACCCCTTTGACAGGGGGGGTCAGTACCATATGGGTCAGTACCTAGGGTCAGTACCACGGGTCAGTACCGAAATTAGACTCACGAGCGGGTCAGTACCTTGGGTCAGTACCGATACTCCTGGGCCGGAGAAGGTGCCCAGTTCGGGTCAGTAGGGTCGGGTCAGTACCGGATGACGTAGCGTGGGGTCAGTACCGACCAGCCAGCGTCGTCCGTTATAGGAACTATTCAATGGGTCAGTACCATGCTTCTAAAACTAGATCTAAGTCGACCTCCTTAACTTATGGGTCAGTACCACCAGGACGGGCTTTCGGCTCGGGTCAGTACCTCGGGGGTCAGTACCTTCGGATGAGCGAGACGCTAGGACATGGGTCAGTACCATATAGAGTTAACAATCCTACAGGGGTCAGTACCTACCGATTTATCGCAAGCAGGGGGGTCAGTACCACTTACTTGTATCGTAGAGACATAGGGAGCCAGATTTTGTTTTACTCGCAGGGGTTTTAAAGCTCCGATGTGTCAAGAACTCCGTGGTAGGCCATACTCTGTTCTGAGACCCTATCTGTAAGGCGGGGCCTGAAGTTACTGGCCACTTTAGTGAGTAAAATTGACCGCTCGAGCCACTGGCCGACCTTCGATCCGCTTAGATCATCATATGGGAGAGAGATGTTTATAGATAGTGATCTCTGTGTTAATACAAGGAATCCAGCAGGCCCGCCTTCTGACACCAAAGCAGCAATCACGAAGCCGCTTCGATCCGGACACCTTCTTATGCCATGCCCGCTAATTTGACTCTCTCCTAACCAGTTCTATGCGCCAAGATGCCATTAAACATTGGAGTGTTCAACACGCACAGAGTATTAGACTCCGTGTTGGTCACATAGTGGCTATTATGGTCCAGTTGGCGGGCACTGCAGTCCGTCCAGTTGGCGTGTCCAGTTGGCGAATTGGTCCAGTTGGCGATGTTTGTTGCTCTAGATGTCCAGTTGGCGAATCCTAGAGAGTGTATTTATTGACGGTGTGAACACCTTCTTCCAGCGTGCCAGCGTCCAGTTGGCGTGGCGGAAACCGGCTGTCCAGTTGGCGGTAGAATAAGTTCGCGGCTAGTCCAGTTGGCGTCCAGTTGGCGGGGCGACGTCGTCCAGTTGGCGGGTCCAGTTGGCGTTGGGTCCAGTTGTCCAGTTGGCGAGATCAAGGACATAGGCAGATATATGTCCAGTTGGCGTTGATGTAGCTAAGTACTAGGGGACTCCGAAAGTCGGGTCCAGTTGGCGGGTCCAGTTGGCGGATCTGCAACCAAAGTAAACGTCCAGTTGGCGACTGGACAAACCAGATATGCTAACAGTCCGGCCCTGGGTGGTCCAGTTGGCGGAAGTCCAGTTGGCGGGTTGTCAGGTCACGGTGGTCCCCCCCGTATGGTCCAGTTGGCGTTCAACCGTCCAGTTGGCGACAAATCAGCTCTTACGGTCCAGTTGGCGTGGTTCCGAGGACAAACGTATCCTGTACACCCGAATTACAAGTCGCGCAACGTCACCTGGGTTGTTTGACGGACGCTGATATTGGCCGAGAACAATTAATGGCAGTGACCGCCCAGTACGGAGGGTGACGTGCTCGTACTAATTTGAGAGATGCGCTTTCCGGGGGTGAGGATCACGACCCTGTTTGAGGTATTATACTAAGGCTCAGAACTGCGCAAATGCCGCTCGGATGTTTTACAAGCCGCGCATTCCCCAGCTTTCTTCACTAGATTAAGCTGTTCTCGAATAGTCTTTACCCTTACTTGTTCGCGTACGCGACGGACTGCTTCTACAACTAATCTTAACACCAGCATTCCACAGCGTCTGCACAGACACCCAATGAACAATGCGCATTTTAAGACTGAAGCTGCGCCTATCGCGCTCCTTCCGTCGTAGACGACCACATCACTTCCCCGTTAACGATTTAGATTGAACATTTCTGCATCCCTAGGCACGATGCCAGTTATTGATGAGTGGCGAGTAGTAAGTTTTAAAAACGATATCCCACTAGGGGCAGGGCCCGCTAACTGTACCGTACATTATTTGCCGGTGCCGATTCTATGACTAAGGAAGAGGGTGATCGTTAAGCCTTGTCCAGTAAGTGAAAGGACGACTCCCCTCCCCAACCTACATCCTCCCCAAGAGGGTGGGTGCGCTCTACAGGACACCTCCCCAAGCTATAACTCTATGAATTCCTCCCCAACCAAAATTTTCCTCCCCCTCCCCAACCCCAAACTCTATGAATTGTCCTCCCCAAGGCGGAAACCTATCTGCCCTCCCCAACCAATTTTTTCGTCGAAGGTCAAAGCATCCCTTTATTTTACTCTATGAATTGCGCTAGCACCCTCCCCTCCCCAACTCCCCAGATCTTTAGAATCAGATCTTTAGCTCCCCAGATCTTTAGCTCCCCAATAAACTCAGATCTTTAGCCAATTCCAGCAGATCTTTAGTCTTTAGCCCAAATGGCAGCCCTCCCCAAAAGAGCCCAACGACTCTATGAATTATCAGCAGACAGATCTTTAGCTGACTTAAGTTGCTGGAACTCTCAGATCTTTAGGCAGATCTTTAGTACTGGCACGCACGGAACTCCCACGGAACTACGGAACTAAATTAGACCACGGAACTTTGTAAGTCACGGAACTGGACTTAAGTTCACGGAACTACTAGGACTTACACGGAACTTCAGATCTTTAGATTCTGTACACACGGCACGGAACTGTACCGCGCAACAACCTCCTCCGACAACCTCCTCGGCCCCACGGAACTGACTTACACGGAACTGACTTAAGTTGATCTTTACCACGGAACACGGAACTGTCTTTAGCACGGAACAACCTCCTCGTGACTTAAGTTGAAAGCCACAACCTCCTCCTCCGGACACAACCTCCTCCACAACCTCCTCACGCACGGAACTAACAACACAACCTCCTCTCCGGTAACGAGGGACTTAACACGGAACTCACAACCTCCTCCTGAACTCACGGAACAACCTCCTCGACAAAGCACGACAACCTCCTCTCCTCACAAACAACCTCCTCAACTGTGGGCAAGACTGGTTATCCTACTCCGGAACAACCTCCTACAACCTCCTCACCTCCTCCGATTCTTTCGGCATTTAAGACAACCTCCTCAACCTCCACAACCTCCTCCAACCTCCTCAATCCAAACTCGGATGAGAAAGTGACAACCTCCTCTTCTACAACCTCCTCTCGATTCTTTCTACAACCTCCTCCTCACAACCTCCTCCGATTCTTTCTCGATTCTTTCTTCTTTCGCTTTAGAGTGGCCCGGGTGGACGCTTTCGGGCCTCTCGATTCTTTCGCTCGATTCTTTCCTCAGGAGTCTAACCTCCTCGATTCTTTCTCGATTCTTTCTTCTATGAAAGCAAGAACGATATTTTCTCCAGAACGATATCGATATTGCTCGATTCTTTCGAAGTCTTTGATTAAGCACAAGTCGAATCTCGATTAGAACGATATGAACGATAGAACGATATATTCTACCCCCCGGATCAGAACGATATTTCTCTTTCCGTTCTCGATTCCTCGATTCTTTCTCGAGAACAGAACGATATATCTCGATTCTTTCAGAACGATATATATCAATTATCAAACGTCCGAGAACGATATGTATCTCCAGAACGATATACGCCACATCATTCAGAGAACGATATACTAGAACGATATTTATAAGAACGATATGATATTACGGATACAGAACGATATAGAACGATATTAGCGCGGGCTCCCGTCAGAACGATATCCAAGATTAAAGTTTTCAAGAACGATATACAGAACGATATTAGAACGATATGATATGCGTCGACATCTTAGAACGATATAAGCGTCCCTTCTTGCGATTGTGTCTAGTGGCGCGTCTACCTTAATCCTGTCAGTAACCTCGATCTAGCATCGAAGTGAACATCCACGGTCCTGTTGTTGGCATGGAGATTGGTATTGGCTATATTGTCGTTATTCGGCACCCTGCATTCATGCCCAGGATACCGAACTCCCCCCTGCCGAAGCTACGCCGCTCGGTCGCGCCTGACCGGTGTAAACAAGCTACGCGCGATCGTATTGGCTAGCATAAGCAGTCTGGGCTATGGCCCAATTTTCGCATAGTACGTAAGGGACTTAAGGTATTTGTCCAGGGCCCTCACCCCTGCAATCGCGGAGGACGTGCCATCGTGGCCCCTCTCGTCTTAGACACGGTATAAGCCCCGCACAGGCACATTAATGTCTTCGACTGAACGTGTTATGTGAGCTTAATACGAGACTGCACTTTAGTCTCGGAGTGGCTTGGGACCGATGTTGTTATTCGCTTTTAAACTGTTGGCCACCTGCGTCATACTGGGAGAATCTCGGGCACTGGTTGACCAGATCGGGTGGGTTTTCATTACTCGCTTGAACCCGATTCATTACTCCATGAGTGCTGCAGTCCATTATCCACCGTGCGTGAAAGTAAGTCTCACCGGTTCGAGTCATCCCAAGCGTCACGGCTCCTAACGTCTGTTTGTCAAAATACTACTTATGTTCTTGACAATCATTGTGCTTCCAGTAGTAACCCAACGAACGCTGGTGTAACCCAACGGCCCTCACTCGGTAGTAACCCAACGGGTAACCCAACGCAACGCTCGATGAACGAATGTGGGGTAACCCAACGGCTGGGCCGTGGCGCTCTCAAAGTGAACGGATGTAACCCAACGAAGTAACCCAGTAACCCAACGATCCGGGGTTAGTGTAACCCAACGATCCCCTGCCGATATTAAGTCGCTCGCTTACGTGCTGGCGTAACCCAACGAGTGGTGAAATAATTAATCAACGAGAGCAAGGGTAACCCAACGGGTAACCGTAACCCAACGTAACCCAACGACGTAACCCAGTAACCCAACGCCAAATACCTTATGTATGGGGTAACCCAACGAGTAACCCAACGTACCGGTAACCCAACGCAACGCAGAAAAGCTAGTATTCCGAAAATCGTACCGGAGTATGTAACCCAACGAGAAGGTCGGCCTTGGTTTGAGGATCCTGCGATGAACTCACACTTTTCGGGGCAATCCGACGAGTGCCGGGTAACCCGTAACCCAACGCAAGTTAAGGGCAACAGCATCAAGGTAACCCAACGTCGCATCTGACACGTCCCATTCCGCGGTGAGATTGGCACAGCTCTTAGCCGGCTCTAAGGGCAAGCCTAGTGTTGCATTAGTTTGTGGTACTAATCGCATAGCTTGCGCGTCCAGCCTCTCCTCTTCATGATCTCCTATACCCGTGACGGACGCCGCGCGAATCATTTGTGTTAGTTTGTTATCGATATGACAGAAGGCACTAAAAACAGGTCGAAGAGGTCCAGATTTAGCCATGTACTTAGGCGCTATACGTTTCCGAAGACGTAAAGAACTCGAGCTGTGTTTCCCACGTTCTTCACCATAAACTCGAAGCTGCGCATGAACCCAAGTGTAACTAGCTTCGTTCATCGCCCTATCGCCGTTGCTATAGGGGTCGAATAGCTCCACATTGAACCGCCCGAAATGGTCCCCTCTTGTAGAGCTTATGGGTCTCACTAAATTAAACACAATCCCTAAGAATAAAATGTCCGATGAGAGCTCCATAAGCTTTGGACGATCAAAAGGACTTGGACGGACGTCGGGATGATAATAACACTCGCCACCTACTCTTGACATTTCTCCACACAGGATTAGGGGACGAGGAGTATACAGTAATACAACAATTAGTCGCCGGAGCTAGCACGAATAGGAAAGTTTCTAAAGTGTGGAGTGCCGGGCACTCTTCGCGACGCCATATTACCCCCTTTTCCGACCCTTGCACTATGCAGAACCTCAAGTATAGAACGAAGGTATAGCTCATATAAGAATCCTGTATCTCGCAGACAGAGAGGAGACGGCACACTTCAAGACCGTCTCGCAGACAGGCACGAAACCAATGTCTTGCAAATCAATTACATATGCTTCCACAAAAGTGCTCTCGCAGACAGCCATAATTGATTAACTTCTCGCAGACTCTCGCAGACAGCAGACAGCGAGGAATCTCGCAGACAGCAGGCGATGGTAAGGGCATACTCTCTCTCGCTCTCGCAGACAGGGCAGATCGCAGGCTAGCATTTGGTCTCGCAGACAGCTTTTCTCGCAGACAGGTCCCTTCTCGCAGACAGGGATTTATCATCACGGTGGACGATGGTCTCGCAGACTCTCGCAGACAGCTAGTGGTGATCGCTATCTCGCAGACAGCTCTCTCGCAGACAGGTCTCTAGTACAGATATCTCGCAGACACTAGTACAGATATCTCGCAGACAGTCGCAGACAGCTAGTACAGATCTAAATGGGATCATACTCATGTCTAGTACAGATCATTATAGTTCTCGCAGACAGAGTTAGCACATTCTCGCAGACAGCTCTCGCAGACAGGAGTACGGGCACTAGTACAGATTCTCTAGTACAGATGAACTACGCCTAGTACAGACTACTAGTACAGATAGTACAGATCCTAGTACAGATGAACTAGTACAGATCTATGCAGCGCCACACAATTGCTAGTACAGATTCCGTTGTATATTAGGGTCATTGCTCGCTCAAAGGAAGTGTATTGTTCCCGTTTCTATCCGCGCCTAGTACAGATGATCTGTTAGCGCCCGTAGTACCAGAAACTGGCCCAGAGGGGATCTCGAAAAGCCTCTACTAGTACAGATCTAGCTACTAGTACAGATACAGATCATCTGGTCAACCTTCTCCTAGTACAGATTGTTCTCCTAGTCTAGTACAGATGCACTAGTACAGATAGCTAGTACAGATAGACAAGAGCAACTAGTACAGATAGCTAGTACAGATAGTTTACCACCTAATGGGAACGTTGGGAAATCTGCGTATCGTAGGAGTGGAAGGCATGGGGGTCCTTCCCTATGTGCCTCACCACCTAATGGGAGCTCAGAATCACGGGTAAGCGGGCAGGCGAGATCGGACGGATACCACTGAGTCTTGGGGCTTGCCATTCAAAAAACCTAATGGGAATGGGTTCGACCTAATGGGCTACCTAATGGGGGAATGGGAATACCTAATGGGATGGGGCGGACCTAATGGGTGACCTAATGGGACCTAATGGGAACCTAATGGGTAATGGGACCTAATGGGACCTAATGGGACCTAATGGGACCTAATGGGACCTAATGGGAACCTAATGGGTAATGGGACCTAATGGGGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACTGATCGAACT", 12, 525, 16):
f.write(i + " ")