-
Notifications
You must be signed in to change notification settings - Fork 1
/
bio.py
108 lines (93 loc) · 4.69 KB
/
bio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/python3
#
# bio.py
# Biopython plugin for Caterpillar Proxy
#
# Euiseo Cha (Wonkwang University) <zeroday0619_dev@outlook.com>
# https://github.com/gnh1201/caterpillar
# Created at: 2024-07-02
# Updated at: 2024-07-02
#
from socket import socket
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction
from base import Extension
def _analyze_sequence(sequence: str) -> dict[str, str]:
"""
Analyze a given DNA sequence to provide various nucleotide transformations and translations.
:param sequence: DNA sequence (string) to be analyzed.
:return: Dictionary containing the following analyses of the sequence:
- complement: DNA complement of the sequence.
- complement_rna: RNA complement of the sequence.
- reverse_complement: Reverse complement of the DNA sequence.
- reverse_complement_rna: Reverse complement of the RNA sequence.
- transcription: Transcription of the DNA sequence to RNA.
- translation: Translation of the RNA sequence to an amino acid sequence.
- back_transcribe: Back-transcription of the RNA sequence to DNA.
"""
sequence_object = Seq(sequence)
return dict(
complement=str(sequence_object.complement()),
complement_rna=str(sequence_object.complement_rna()),
reverse_complement=str(sequence_object.reverse_complement()),
reverse_complement_rna=str(sequence_object.reverse_complement_rna()),
transcription=str(sequence_object.transcribe()),
translation=str(sequence_object.translate()),
back_transcribe=str(sequence_object.back_transcribe()),
)
def _gc_content_calculation(sequence: str) -> dict[str, str]:
"""
Calculate the GC content of a given DNA sequence and return it as a float.
:param sequence: DNA sequence (string) for which to calculate the GC content.
:return: Dictionary containing the GC content as a float.
"""
gc_content = gc_fraction(sequence)
return dict(
gc_content=gc_content,
)
class PyBio(Extension):
def __init__(self):
self.type = "rpcmethod"
self.method = "analyze_sequence_init"
self.exported_methods = ["analyze_sequence", "gc_content_calculation"]
def dispatch(self, type, id, params, conn):
conn.send(b"Greeting! dispatch")
def analyze_sequence(self, type, id, params, conn: socket):
"""
Analyze a DNA sequence provided in the params dictionary.
:param type: Not used in this function.
:param id: Not used in this function.
:param params: Dictionary containing the DNA sequence with the key "sequence".
Example: {"sequence": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT"}
:param conn: Not used in this function.
:return: Dictionary containing various analyses of the DNA sequence:
- back_transcribe: Back-transcription of the RNA sequence to DNA.
- complement: DNA complement of the sequence.
- complement_rna: RNA complement of the sequence.
- reverse_complement: Reverse complement of the DNA sequence.
- reverse_complement_rna: Reverse complement of the RNA sequence.
- transcription: Transcription of the DNA sequence to RNA.
- translation: Translation of the RNA sequence to an amino acid sequence.
Example: {"back_transcribe": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT",
"complement": "TACGCATGCATCGATCGATCGCATCGATCGACTGA",
"complement_rna": "UACGCAUGCAUCGAUCGAUCGCAUCGAUCGACUGA",
"reverse_complement": "AGTCAGCTAGCTACGCTAGCTAGCTACGTACGCAT",
"reverse_complement_rna": "AGUCAGCUAGCUACGCUAGCUAGCUACGUACGCAU",
"transcription": "AUGCGUACGUAGCUAGCUAGCGUAGCUAGCUGACU",
"translation": "MRT*LASVAS*"}
"""
result = _analyze_sequence(params["sequence"])
return result
def gc_content_calculation(self, type, id, params, conn: socket):
"""
Calculate the GC content for a given DNA sequence provided in the params dictionary.
:param type: Not used in this function.
:param id: Not used in this function.
:param params: Dictionary containing the DNA sequence with the key "sequence".
Example: {"sequence": "ATGCGTACGTAGCTAGCTAGCGTAGCTAGCTGACT"}
:param conn: Not used in this function.
:return: Dictionary containing the GC content as a float.
Example: {"gc_content": 0.5142857142857142}
"""
result = _gc_content_calculation(params["sequence"])
return result