-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmotifs.sql
190 lines (189 loc) · 29.2 KB
/
motifs.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
create table if not exists motifs.protein_motifs (
ID int not null auto_increment primary key,
motif varchar(255) not null,
subtype varchar(255),
prosite varchar(255) not null,
regex varchar(255) not null,
notes varchar(255));
insert into protein_motifs (motif, subtype, prosite, regex, notes)
values
("Serine/Threonine Kinases", "Cyclic-AMP dependent protein kinase", "-R-x(1,2)-[ST]-", "R.{1,2}[ST]", "none"),
("Serine/Threonine Kinases", "Cyclic-AMP dependent protein kinase", "-R-[RK*]-x-[ST]-Hpo", "R[RK].[ST][AVILMFGWP]", "K* instead of R results in lower efficiency<br>Hpo (hydrophobic residue) tends to be I, L, V"),
("Serine/Threonine Kinases", "Cyclic-GMP dependent protein kinase", "-R|K(1,3)-x(1,3)-[ST]-[RK](0,1)", "[RK{1,3}].{1,3}[ST][RK]?", "none"),
("Serine/Threonine Kinases", "Akt (Protein kinase B or RAC)", "-R-x-R-x(2)-[ST]-Hpo-", "R.R.{2}[ST][AVILMFGWP]", "Hpo = hydrophobic residue"),
("Serine/Threonine Kinases", "Protein kinase C", "-R|K(1,3)-x-[ST]-x-[RK](1,3)-", "[RK{1,3}][ST].[RK]{1,3}", "x before S/T active site is uncharged; x after S/T is mainly hydrophobic"),
("Serine/Threonine Kinases", "Ca<sup>2+</sup> calmodulin-dependent protein kinase II", "-R-x(2)-[ST]-Hpo-", "R.{2}[ST][AVILMFGWP]", "V preferred for Hpo (hydrophobic residue)"),
("Serine/Threonine Kinases", "Ribosomal protein kinases p70S6 and p90s6 (MAPKAP kinase-1)", "-[RK]-x-R-x(2)-S-Hpo-", "[RK].R.{2}S[AVILMFGWP]", "none"),
("Serine/Threonine Kinases", "MAPKAP kinase-2", "-Hpo-x-R-x(2)-S", "[AVILMFGWP].R.{2}S", "none"),
("Serine/Threonine Kinases", "Phosphorylase kinase", "-[RK]-x(2)-S-Hpo-", "[RK].{2}S[AVILMFGWP]", "Hpo (hydrophobic) is generally V or I"),
("Serine/Threonine Kinases", "Myosin light chain kinase", "-K|R(3)-x(2)-R-x(2)-S-", "[KR{3}].{2}R.{2}S", "none"),
("Serine/Threonine Kinases", "AMP-activated kinase (SNF1)", "-[MVLI]-[HRK]x(2)-x-[ST]-x(3)-[MVLI]-", "[MVLI].{3}[ST].{3}[MVLI]", "[HRK] basic residue at position 2, 3, or 4 of consensus sequence"),
("Serine/Threonine Kinases", "Proline-directed/cell-cycle kinases", "(K)-[ST]-P-x-[KR]-", "K?[ST]P.[KR]", "(K) is usually present but not invariant"),
("Serine/Threonine Kinases", "Proline-directed/cell-cycle kinases", "(K)-[ST]-P-[KR](1,2)", "K?[ST]P[KR]{1,2}", "(K) is usually present but not invariant"),
("Serine/Threonine Kinases", "Proline-directed/MAP kinase family", "-(P)-(L)-P-x-[ST]-P-(P)", "P?L?P.[ST]P{1,2}", "(P) and (L) usually present but not invariant"),
("Serine/Threonine Kinases", "Ceramide-activated kinase", "-P-L(0,1)-T-L-P", "PL?TLP", "none"),
("Serine/Threonine Kinases", "Ceramide-activated kinase", "-P-L(1,2)-T-P-", "PL{1,2}TP", "none"),
("Serine/Threonine Kinases", "Glycogen synthase kinase-3", "-[ST]-x(3)-pS*-", "[ST].{3}S", "first x is usually P<br>pS = phosphoserine"),
("Serine/Threonine Kinases", "Casein kinase I", "-p[ST]*-x(2)-[ST]-", "[STDE].{2}[ST]", "p[ST]* is phosphorylated [ST]- can also be [DE] (poorer substrate)"),
("Serine/Threonine Kinases", "Casein kinase II", "-[ST]-[DE|pSpY](5)", "[ST][DESY]{5}", "[DE] or phosphorylated serine/tyrosine - acidic residues"),
("Serine/Threonine Kinases", "Golgi casein kinase", "-S-x-[E|pS]-", "S.[ES]", "pS = phosphoserine"),
("Serine/Threonine Kinases", "2-Oxoacid dehydrogenase kinase", "-S-x(1,2)-[DE]-", "S.{1,2}[DE]", "none"),
("Serine/Threonine Kinases", "Light harvesting complex II kinase", "-R-K-[STA](2)-T-[STAK]-x-(K)-", "RK[STA]{2}T[STAK].K?", "none"),
("Serine/Threonine Kinases", "NIMA kinase family", "-R-F-R(1,2)-x-[ST]-[R|Hpo](2)-Hpo(2)-", "RFR{1,2}.[ST]R?[AVILMFGWP]{2,4}", "Hpo = hydrophobic residue"),
("Serine/Threonine Kinases", "Autophosphorylation-dependent kinase (p21-activated kinase-2, PAK2)", "-R-x(1,2)-[ST]-x(3)-[ST]-", "R.{1,2}[ST].{3}[ST]", "Pol = hydrophilic, Hpo = hydrophobic"),
("Tyrosine-Protein Kinases", "Receptor tyrosine kinase", "-Pol(1,3)-E-Y-Hpo(3)-", "[STHNQEDKR]{1,3}EY[AVILMFGWP]{3}", "none"),
("Tyrosine-Protein Kinases", "Cytosolic/intracellular tyrosine kinase", "-Pol(1,3)-[IV]-Y-[EG]-E-Hpo-", "[STHNQEDKR]{1,3}[IV]Y[EG]E[AVILMFGWP]", "Pol = hydrophilic, Hpo = hydrophobic"),
("Tyrosine-Protein Kinases", "Cytosolic/intracellular tyrosine kinase (c-Abl)", "-Hpo-Y-A(2)-P-", "[AVILMFGWP]YA{2}P", "Hpo = hydrophobic"),
("Dual Specificity Kinases", "MAP kinase kinase (Mek family)", "-T-E-Y-", "TEY", "both T and Y are phosphorylated"),
("Cofactors and Prosthetic Group Attachment Sites", "N-Glycosylation", "-N-x-[ST]-", "N[^DP][ST]", "x is rarely P or D"),
("Cofactors and Prosthetic Group Attachment Sites", "Attachment site for fucosyl residues", "-C-x(2)-G(2)-[TS]-C-", "C.{2}G{2}[ST]C", "none"),
("Cofactors and Prosthetic Group Attachment Sites", "Mucin-type-O-glycosylation", "-x-T-P-x-P-", "[^DEHKR]TP[PGASHKR]P", "position 1 x: uncharged residues preferred<br>position 4 x: small or basic residues preferred"),
("Cofactors and Prosthetic Group Attachment Sites", "Glucosaminoglycans", "[DNEQ]-x-[DNEQ]-x-S-G-x-G-", "([DNEQ].){2}SG.G", "none"),
("Cofactors and Prosthetic Group Attachment Sites", "Tryptophan-Glycosylation", "-W-x(2)-W-", "W.{2}[WF]", "mutagenesis to -W-x(2)-F- reduces reaction threefold"),
("Tyrosine Sulfation", "none", "-Neg(1,5)-Y-Neg(1,5)-", ".{4}[DE]Y.{5}", "at least 3 negative residues (D, E, TyrSO<sub>4</sub>) before and after active site Y<br>residue (mostly D) at position right before Y is strongest determinant"),
("Phosphopantetheine Binding Site", "none", "-L-G-x-D-S-[LIT]", "LG.DS[LIT]", "none"),
("Phosphopantetheine Binding Site", "none", "-Hpo-G-Hpo-[DK]-S-Hpo", "[AVILMFGWP]G[AVILMFGWP][DK]S[AVILMFGWP]", "Hpo = hydrophobic residue"),
("Biotinyl-Lysine", "none", "-[AIV]-M-K-M*-", "[AIV]MK[MA]", "second methionine replaced by alanine in one exception"),
("Lipoyl-Lysine Binding Site", "none", "-Hpo-E-[TS]-D-K-A-x-Hpo-[DEG]-Hpo", "[ILMV]E[TS]DKA.[ILMV][DEG][AVILMFGWP]", "first 2 Hpo = nonaromatic hydrophobic residue; last Hpo is any hydrophobic"),
("Bilin Attachment", "none", "-A-x-C-Hpo-R-D-", "A.C[AVILMFGWP]RD", "none"),
("Dipyrromethane Cofactor", "none", "-G-[SAG]-C-x-V-P-", "G[SAG]C.VP", "none"),
("FAD Binding Site", "none", "-R-S-H-[ST]-x(2)-A-x-G(2)-", "RSH[ST].{2}A.G{2}", "none"),
("Hypusine Attachment (in eIF-5A)", "none", "-T-G-K-H-G-x-A-K-", "TGKHG.AK", "none"),
("Retinal Binding", "none", "-L-D-[LIVM]-x-A-K-x(2)-[WYF]-", "LD[LIVM].AK.{2}[WYF]", "none"),
("Carbamoyl-Phosphate Binding", "none", "-F-x-[EK]-S-[GT]-R-T-", "F.[EK]S[GT]RT", "none"),
("Phosphohistidine Active Sites", "ATP-Citrate Lyase and Succinyl CoA Ligase", "-[LIVM]-G-H-A-G-A-", "[LIVM]GHAGA", "none"),
("Phosphohistidine Active Sites", "Phosphoglycerate Mutase", "-[LIVM]-x-R-H-G-x(3)-N-", "[LIVM].RHG.{3}N", "none"),
("Acyl-Phosphate in Phosphomutases and Phosphatases", "none", "-D-V-D-x-[TV]-", "DVD.[TV]", "first D is the acyl-phosphate"),
("N-Terminal Consensus Sequences", "N-Terminal Myristoylation/Palmitoylation", "M-G-x(3)-[STAGCN]-x–", "^MG[^P].{2}[STAGCN][^P]", "initiator M is removed; first and last x are not proline<br>if first x is cysteine (i.e., M-G-C-), protein is also palmitoylated"),
("N-Terminal Consensus Sequences", "N-Terminal Methylation (eukaryotes)", "(Me)3-A-P-K- and (Me)2-P-P-K-", "^[AP]PK", "di- or tri- methylation"),
("N-Terminal Consensus Sequences", "N-Terminal Methylation (bacterial pilins)", "-G-[MF]*-[ST]-[LT]-Hpo-E-", "G[MFLIVY][ST][LT][AVILMFGWP]E", "*site of methylation; [LIVY] can also occur<br>Hpo = hydrophobic residue"),
("N-Terminal Consensus Sequences", "Bacterial Lipoprotein Glyceride Cysteine Thioethers", "-L-A-G*C-S(2)-N-", "L[SAN][SANG]GC[SAN]{2}N", "*cleavage site; lipid is attached to cysteine residue, which becomes the amino terminus"),
("C-Terminal Consensus Sequences", "Amidation", "-x(2)-G*[RK](2)-", ".{2}G[RK]{2}$", "*cleavage site; G is processed to an amide"),
("C-Terminal Consensus Sequences", "Amidation", "-x*Q-x-P-G*x-", ".Q.PG.$", "*cleavage sites; Q is cyclized to pyroglutamate<br>thyrothropin releasing hormone and related peptide precursors"),
("C-Terminal Consensus Sequences", "C-A-A-X Boxes", "-C-[DNEQ]-[ILMV]-x CO<sub>2</sub>H", "C[DNEQ][ILMV].$", "if X is S, C, M, A, or Q, the C-terminus is farnesylated; if X is L or F, the protein is geranylated."),
("C-Terminal Consensus Sequences", "C-A-A-X Boxes", "-C-x-C CO<sub>2</sub>H", "C.C$", "YPT/rab proteins; latter Cys is isoprenylated (geranylated) and carboxymethylated"),
("C-Terminal Consensus Sequences", "C-A-A-X Boxes", "-C(2) CO<sub>2</sub>H", "C{2}$", "YPT/rab proteins; latter Cys is isoprenylated (geranylated)"),
("Metal Binding Motifs", "Calcium-Binding (Calmodulin-Binding IQ Motif)", "-I-Q-x(3)-R-G-x(3)-R-x(2)-[YW]-", "[AVILMFGWP]Q.{3}R", "Ile can be another hydrophobic residue<br>some proteins only have part of the motif -I-Q-x(3)-R(-G-x(3)-R)<br>frequently a PEST region (rich in Pro, Glu, Ser, and Thr) may be adjacent"),
("Metal Binding Motifs", "Annexins (Ca<sup>2+</sup>-Dependent Phospho- lipid Binding)", "-K-G-Hpo-G-T-D-E-x-[SATC]-[LI]-[ILVT]-x-[ILV]-[ILM]-[CATV]-x-R-[ST]-x(26)-[DE]-", "KG[AVILMFGWP]GTDE.[SATC][LI][ILVT].[ILV][ILM][CATV].R[ST].{26}[DE]", "consensus for Ca<sup>2+</sup>-binding: -G-T-D-E-"),
("Metal Binding Motifs", "gamma-Carboxyglutamic Acid (Gla)-Containing Proteins", "-Gla-x(3)-Gla-x-C-", "E.{3}E.C", "none"),
("Zinc Fingers", "C<sub>2</sub>H<sub>2</sub> class", "-[FY]-x-C-x(2,5)-C-x(3)-Hpo-x(5)-L-x(2)-H-x(2,5)-H-", "[FY].C.{2,5}C.{3}[AVILMFGWP].{5}L.{2}H.{2,5}H", "Hpo = hydrophobic residue (usually Phe/F)"),
("Zinc Fingers", "GATA/C<sub>4</sub> type", "-C-x(2)-C-x(17)-C-x(2)-C-", "C.{2}C.{17}C.{2}C", "none"),
("Zinc Fingers", "GATA type", "-C-x-N-C-x(4)-T-x-L-W-R-[RK]-x(3)-G-x(3)-C-N-A-C-", "C.NC.{4}T.LWR[RK].{3}G.{3}CNAC", "none"),
("Zinc Fingers", "C<sub>4</sub> type", "-C-[DES]-x-C-x(3)-I-x(3)-R-x(4)-P-x(4)-C-x(2)-C-", "C[DES].C.{3}I.{3}R.{4}P.{4}C.{2}C", "none"),
("Zinc Fingers", "C<sub>4</sub> steroid finger", "-C-x(2)-C-x-[DE]-x(5)-H-[FY]-x(4)-C-x(2)-C-x(15,17)-C-x(5)-C-x(9)-C-x(2)-C-x(4)-C-", "C.{2}C.[DE].{5}H[FY].{4}C.{2}C.{15,17}C.{5}C.{9}C.{2}C.{4}C", "none"),
("Zinc Fingers", "LIM domains (in homeobox and other proteins)", "-C-x(2)-C-x(16,22)-Hpo-H-x(2)-[CH]-x(2)-C-x(2)-C-x(16,21)-C-x(2,3)-[CHD]-", "C.{2}C.{16,22}[AVILMFGWP]H.{2}[CH].{2}C.{2}C.{16,21}C.{2,3}[CHD]", "Hpo = hydrophobic residue"),
("Zinc Fingers", "Ring finger (C<sub>3</sub>H<sub>4</sub> type)", "-C-x-Hpo-C-x(9,27)-C-x-H-x(2)-C-x(2)-C-x(6,17)-C-P-x-C-", "C.[AVILMFGWP]C.{9,27}C.H.{2}C.{2}C.{6,17}CP.C", "Hpo = hydrophobic residue"),
("Zinc Fingers", "GAL4/fungal Zn2Cys6 binuclear cluster", "-C-x(2)-C-x(3)-[HRK]-x(2)-C-x(5,9)-C-x(2)-C-x(6,8)-C-", "C.{2}C.{3}[HRK].{2}C.{5,9}C.{2}C.{6,8}C", "none"),
("Zinc Fingers", "TFIIS zinc ribbon", "-C-x(2)-C-x(9)-[ILVM]-Q-T-R-[STA]-x-D-E-P-x(6)-C-x(2)-C-", "C.{2}C.{9}[ILVM]QTR[STA].DEP.{6}C.{2}C", "none"),
("Zinc Fingers", "Glo family", "-C-x(2)-C-x(16)-C-x(2)-C-", "C.{2}C.{16}C.{2}C", "none"),
("Zinc Fingers", "B box", "-C-x(2)-H-x(7)-C-x(7)-C-x(2)-C-x(5)-H-x(2)-H-", "C.{2}H.{7}C.{7}C.{2}C.{5}H.{2}H-", "none"),
("Zinc Fingers", "B box: BIR (apoptosis inhibitory proteins", "–C-x(2)-C-x(16)-H-x(6)-C-", "C.{2}C.{16}H.{6}C", "none"),
("Zinc Fingers", "B box: FYVE (endosomal localization)", "-C-x(2)-C-x(12)-C-x(2)-C-x(4)-C-x(2)-C-x(16)-C-x(2)-C-", "C.{2}C.{12}C.{2}C.{4}C.{2}C.{16}C.{2}C", "none"),
("Zinc Fingers", "Diacylglycerol/Phorbol Ester Binding", "-H-x-Hpo-x(10,11)-C-x(2)-C-x(3)-Hpo-x(2,7)-[AG]-Hpo-x-C-x(2)-C-x(4)-H-x(2)-C-x(6,7)-C-", "H.[AVILMFGWP].{10,11}C.{2}C.{3}[AVILMFGWP].{2,7}[AG][AVILMFGWP].C.{2}C.{4}H.{2}C.{6,7}C", "Hpo = hydrophobic (mainly Phe/F)"),
("Zinc Fingers", "Thermolysin (“Metzincin”-Type)", "-H-E-x(2)-H-x(2)-G-x(2)-H-", "HE.{2}H.{2}G.{2}H", "none"),
("Zinc Fingers", "Histidine Triad (HIT) Family", "-N-x-[GE]-x(2)-[GA]-x-Q-[TSE]-[VI]-x-H-[LVST]-H-Hpo-H-[LVI]-[LIF]-", "N.[GE].{2}[GA].Q[TSE][VI].H[LVST]H[AVILMFGWP]H[LVI][LIF]", "Hpo = hydrophobic residue"),
("Iron-Binding Motifs", "Iron-Sulfur Proteins: [2Fe-2S] cluster", "-C-x(4)-C-x(2)-C-x(n)-C-", "C.{4}C.{2}C+C", "none"),
("Iron-Binding Motifs", "Iron-Sulfur Proteins: [4Fe-4S] cluster", "-C-x(2)-C-x(2)-C-x(3)-C-P*-", "C.{2}C.{2}C.{3}C[PGE]", "*can also be G or E in two exceptions"),
("Iron-Binding Motifs", "Reiske Iron-Sulfur Proteins", "-C-T-H-L-G-C-[LIV]-x(n)-C-P-C-H-G-S-", "CTHLGC[LIV].+CPCHGS", "none"),
("Iron-Binding Motifs", "Rubredoxin Turn (Knuckle)", "-[ILMV]-x(3)-W-x-C-P-x-C-[GAD]-", "[ILMV].{3}W.CP.C[GAD]", "none"),
("Iron-Binding Motifs", "Covalent Haem-Binding Site (in c-Type Cytochromes)", "-C-x(2)-C-H-", "C.{2}CH", "none"),
("Copper Binding", "Type I Copper Binding", "-H-N-x(4,39)-Y-x-[YF](2)-C-x-P-H-x(2,6)-M-", "HN.{4,39|Y.[YF]{2}C.PH.{2,6}M", "none"),
("Copper Binding", "Tyrosinase", "-W-H-R-", "WHR", "none"),
("Metal-Binding Sites", "Dehydrogenases (metals with 2 oxidation states)", "-H-x-[LIM]-x-H-x(9)-H-G-", "H.[LIM].H.{9}HG", "none"),
("Metal-Binding Sites", "Nickel-dependent hydrogenase", "-C-x(2)-C-", "C.{2}C", "none"),
("Metal-Binding Sites", "Metallothionein", "-C-x-C-x(3)-C-x-C-", "C.C.{3}C.C", "none"),
("Metal-Binding Sites", "Metallothionein", "-C-x(3)-C-x-C-x(2)-C-x-C-x(2)-C-", "C.{3}C.C.{2}C.C.{2}C", "none"),
("Metal-Binding Sites", "Phospholipase A<sub>2</sub>", "-C(2)-x(2)-H-x(2)-C-", "C{2}.{2}H.{2}C", "none"),
("Metal-Binding Sites", "Aminopeptidase", "-N-T-D-A-E-G-R-L-", "NTDAEGRL", "none"),
("Metal-Binding Sites", "Pyruvate kinase", "[LIVM](2)-x-K-[LIVM]-E-[NR]-x-[EQ]-[GA]", "[LIVM](2).K[LIVM]E[NR].[EQ][GA]", "K: active site, E: Mg<sup>2+</sup> ligand"),
("Redox Proteins", "Thioredoxins", "-W-C-G-P-C-", ".C.{2}C", "residues other than W, G, P can occur"),
("Redox Proteins", "Glutaredoxin", "-C-P-[FYW]-C-x(2)-[TA]-", "CP[FYW]C.{2}[TA]", "none"),
("Proteases, Esterases, and Serine Active Sites", "general", "-G-x-S-x-G-", "G.S.G", "includes lipases and acetylcholine esterases"),
("Proteases", "Serine Proteases", "-D-...-H-...-S-", "D.+H.+G.S", "catalytic triad Asp...His...Ser, variable residues in between"),
("Proteases", "ATP-Dependent Serine Proteases (Lon Family)", "-D-G-[PD]-S-A-[GS]-Hpo-[TA]-[LIVM]-", "DG[PD]SA[GS][AGP][TA][LIVM]", "Hpo = nonaromatic small hydrophobic residue"),
("Proteases", "Cysteine (Thiol) Proteinases", "-G*-x-C-[WY]-...-H-[GSTACE]-[LIVM]-...-N-[ST]-W-", "[GE].C[WY].+H[GSTACE][LIVM].+N[ST]W", "*E instead of G in papaya protease"),
("Proteases", "Aspartate Proteinases", "F-D-[TS]-G-S-...-[ILV]-V-D-T-G-", "FD[TS]GS.+[ILV]VDTG", "none"),
("Proteases", "Aspartate Proteinases (retroviruses)", "-L-V-D-[TS]-G-A-...-[ILV]-G-R-D-", "LVD[TS]GA.+[ILV]GRD", "none"),
("Proteases", "Thioesterase Active Site Serine", "-G-x-S-x-G-...-G-B-H-x(2)-L-", "G.S.G.+GBH.{2}L", "none"),
("Proteases", "Cysteine Switch", "-P-R-C-[GN]-x-P-[DR]-", "PRC[GN].P[DR]", "mammalian extracellular metalloproteinases (matrixins)"),
("Proteolytic Processing Motifs", "Signal Peptidase Cleavage Sites", "-[AGSLIV]-x-[AGS]-", "[AGSLIV].[AGS]", "cleavage site after [AGS]"),
("Proteolytic Processing Motifs", "Prohormone Processing", "-K-R-x-", "KR.", "none"),
("Proteolytic Processing Motifs", "Prohormone Processing", "x-R-K-", ".RK", "none"),
("Proteolytic Processing Motifs", "Prohormone Processing", "-R(2)-x-", "R{2}.", "none"),
("Nucleotide-Binding Proteins", "General (P-loop)", "-G-x(4)-G-[K|Hpo]-", "G.{4}G[KAVILMFGWP]", "normally additional third Gly(G) found at x3/x4"),
("Nucleotide-Binding Proteins", "Guanine Nucleotide Binding", "-G-x-G-K-x(40,80)-D-x(2)-G-x(40,80)-N-K-x-[DW]-", "G.GK.{40,80}D.{2}G.{40,80}NK.[DW]", "last 4 residues comprise the nucleotide specificity region<br>if -N-K-x-W-, both ITP and GTP may be utilized"),
("Nucleotide-Binding Proteins", "G-Protein Consensus Sequence", "-K-[LIV](4)-G-[AGN]-G-[GE]-[VS]-G-K-S-...-D-x(2)-G-...-N-K-x-D-", "K[LIV]{4}G[AGN]G[GE][VS]GKS.+D.{2}G.+NK.D", "none"),
("Nucleotide-Binding Proteins", "Protein Synthesis Initiation and Elongation Factors", "-G-H-[IV]-D-[HSA]-G-K-[TS]-...-D-[CAST]-P-G-H...-N-K-[MCVE]-D-", "GH[IV]D[HSA]GK[TS].+D[CAST]PGH.+NK[MCVE]D", "none"),
("Nucleotide-Binding Proteins", "Dinucleotide Binding Proteins", "-G-x-G-x(2)-G-Hpo-", "G.G.{2}G[AVILMFGWP]", "Hpo = hydrophobic residue"),
("Nucleotide-Binding Proteins", "Mononucleotide Binding Proteins", "-G-x(2)-G-x-G-K-[ST]-", "G.{2}G.GK[ST]", "none"),
("Nucleotide-Binding Proteins", "Protein Kinase Catalytic Domain Con- sensus Sequences", "-[LVI]-G-x-G-x-[YF]-G-x-V-x(9–26)-A-x-K-x-Hpo-x(n)-D-F-G-x(1,20)-A-P-E-", "[LVI]G.G.[YF]G.V.{9,26}A.K.[AVILMFGWP].+DFG.+APE", "Hpo = hydrophobic residue<br>sequence before x(9,26) is nucleotide-binding site; sequence after is catalytic domain"),
("Nucleotide-Binding Proteins", "Consensus for Serine/Threonine Kinase Specificity", "-D-L-K-P-E-N-", "DLKPEN", "none"),
("Nucleotide-Binding Proteins", "Consensus for Serine/Threonine Kinase Specificity", "-G-[TS]-x(2)-[TF]-x-A-P-E-", "G[TS].{2}[TF].APE", "none"),
("Nucleotide-Binding Proteins", "Consensus for Protein-Tyrosine Kinase Specificity", "-D-L-A-A-R-N-", "DLAARN", "none"),
("Nucleotide-Binding Proteins", "Consensus for Protein-Tyrosine Kinase Specificity", "-D-L-R-A-A-N-", "DLRAAN", "Src viral tyrosine kinase"),
("Nucleotide-Binding Proteins", "Consensus for Protein-Tyrosine Kinase Specificity", "-P-[IV]-[LR]-W-[TM]-A-P-E-", "P[IV][LR]W[TM]APE", "none"),
("Nucleotide-Binding Proteins", "DEAD- and DEAH-Box ATP-Dependent Helicases", "-Hpo(2)-D-E-A-D-[KREN]-", "[AVILMFGWP]{2}DEAD[KREN]", "Hpo = hydrophobic residue"),
("Nucleotide-Binding Proteins", "DEAD- and DEAH-Box ATP-Dependent Helicases", "-Hpo(3)-D-E-[ALIV]-H-[CREN]-", "[AVILMFGWP]{3}DE[ALIV]H[CREN]", "Hpo = hydrophobic residue"),
("Protein Phophatases Active Sites and Interaction Motifs", "Tyrosine-Specific Phosphatase Active Site Cysteine", "-Hpo-[HV]-C-x(2)-G-x(2)-R-[ST]-[STAG]-", "[AVILMFGWP][HV]C.{2}G.{2}R[ST][STAG]", "Hpo = hydrophobic residue"),
("Protein Phophatases Active Sites and Interaction Motifs", "Protein Phosphatase –2C", "-Hpo(2)-[GSAC]-[LIMV]-D-G-H-[GAV]-", "[AVILMFGWP]{2}[GSAC][LIMV]DGH[GAV]", "Hpo = hydrophobic residue"),
("Protein Phophatases Active Sites and Interaction Motifs", "Binding Domain of Protein Phosphatase 1 Catalytic Subunit (PP1-c) Inhibitory and Targeting Proteins", "-[RK](3)-[IV]-[QS]-F-", "[RK]{3}[IV][QS]F", "none"),
("Protein Phophatases Active Sites and Interaction Motifs", "Protein Phosphatase-1 “Inhibitor 2” Type Binding Motif", "-F-E-x(2)-R-K-", "FE.{2}RK", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH2 Domain Binding", "pY-x(2)-Hpo-", "Y.{2}[AVILMFGWP]", "Hpo = hydrophobic residue<br>pY = phosphotyrosine"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH3 Domains, Class I", "-P{1,2}-x-P-", "P{1,2}.P", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH3 Domains, Class II", "-P(2)-L-P-x-R-", "P{2}LP.R", "e.g., Src"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH3 Domains, Class II (Abl)", "-P-x(2)-P(3)-Hpo-x-P-", "P.{2}P{3}[AVILMFGWP].P", "Hpo = hydrophobic residue"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH3 Domains, Class II (amphyphysin)", "-P-x-R-P-x-[RH](2)-", "P.RP.[RH]{2}", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "SH3 Domains, Class II ('Homer'-Related Synaptic Proteins Binding to Glutamate and IP3 Receptors)", "-P(2)-x(2)-F-R-", "P{2}.{2}FR", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "WD-40 Repeats (beta-Transducin Repeats)", "-Hpo-x-{HRKDE}-x(1,6)-Hpo(2)-[ST]-[GA](2)-x-[DN]-x(2)-Hpo-x-[ILV]-W-D-", "[AVILMFGWP].[^HRKDE].{1,6}[AVILMFGWP]{2}[ST][GA]{2}.[DN].{2}[AVILMFGWP].[ILV]WD", "Hpo = hydrophobic residue<br><br>{HRKDE} = any uncharged residue"),
("Binding Domains and Motifs in Signal Transduction Proteins", "PH (Pleckstrin) Homology Domain", "-V-[IV]-K-E-G-Y-L-K(3)-G-S-x(n)-K-S-W-K-R(2)-[YW]-F-V-L-[RT]-[DE]-x(n)-L-S-Y(2)-K-D-S-x(n)-P-K-G-[LS]-I-[DP]-L-E-[NG]-[IC]-Q-[IV]-V-E-V-E-D-x(n)-K-H-C-F-E-I-V-T-[KP]-D-G-x(n)-L-[IL]-L-Q-A-[ES]-S-E(3)-R-[EQ]-E-W-[VI]-[AK]-A-[LI]-[RQ]-R-A-I-", "V[IV]KEGYLK{3}GS.+KSWKR{2}[YW]FVL[RT][DE].+LSY{2}KDS.PKG[LS]I[DP]LE[NG][IC]Q[IV]VEVED.+KHCFEIVT[KP]DG.+L[IL]LQA[ES]SE{3}R[EQ]EW[VI][AK]A[LI][RQ]RAI", "domain of ~100 residues that binds to phosphatidylinositols; more uncommon variations may not be captured"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Phosphotyrosine-Binding (PTB/PI) Domain", "-Hpo-x-N-P-x-pY-", "[AVILMFGWP].NP.Y", "Hpo = hydrophobic residue, pY = phosphotyrosine<br>Shc and IRS-1"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Phosphotyrosine-Binding (PTB/PI) Domain", " -D-[ND]-x-pY-", "D[ND].Y", "Cbl protooncogene"),
("Binding Domains and Motifs in Signal Transduction Proteins", "PDZ domain binding (Group 1)", "-E-S/T-D-V-", "E[ST]DV$", "-CO<sub>2</sub>H after Val"),
("Binding Domains and Motifs in Signal Transduction Proteins", "PDZ domain binding (Group 2)", "-[FY]-Y-[VIA]-", "[FY]Y[VIA]$", "-CO<sub>2</sub>H after [VIA]"),
("Binding Domains and Motifs in Signal Transduction Proteins", "WW Domain", "x(2)-L*-P*-x-G*-W-E*-x(6,7)-G*-x(2)-[FY](2)-Hpo-N*-H*-x-T*-x-T*(2)-[TQSCR]-W-x(2)-P-x(6)", ".{2}[LIVMF][PGAS].[GASPED]W[DNEQ].{6,7}[GASPED].{2}[FY]{2}[AVILMFGWP][DNEQRK][HRKYNQDE].[STAN].[TSAN]{2}[TQSCR]W.{2}P.{6}", "domain composed of 38-40 residues<br>*residues may be replaced by those with similar properties<br>Hpo = hydrophobic residue"),
("Binding Domains and Motifs in Signal Transduction Proteins", "WW Domain Binding", "-[AP]-P(2)-x-Y-", "[AP]P{2}.Y", "first type"),
("Binding Domains and Motifs in Signal Transduction Proteins", "WW Domain Binding", "–P(2)-L-P-", "PPLP", "second type"),
("Binding Domains and Motifs in Signal Transduction Proteins", "WW Domain Binding", "–P(3)-G-M-R-P(2)-", "PPPGMRPP", "third type, Pro/Gly/Met-rich"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Protein Interaction Motif", "-R-S-x-pS-x-P-", "RS.S.P", "pS = phosphorylated serine"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Protein Interaction Motif", "-R-x-[YF]-pS-x-P-", "R.[YF]S.P", "pS = phosphorylated serine"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Caveolin Binding Motif", "-[FYW]-x(4)-[FYW]-x(2)-[FYW]-", "[FYW].{4}[FYW].{2}[FYW]", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Cyclic AMP Dependent-Kinase Anchor Proteins", "-[LI]-E-[TE]-[AK]-[SA]-[KR]-[LI]-V-[QDK]-[NA]-[IAV]-I-[QE]-", "[LI]E[TE][AK][SA][KR][LI]V[QDK][NA][IAV]I[QE]", "none"),
("Binding Domains and Motifs in Signal Transduction Proteins", "Suppressor of Cytokines Signalling Box", "–Hpo-x-[TSP]-L-Q-H-Hpo-L-C-R-x(2)-[ILV]-x(3)-Hpo-x(2,10)-Hpo-x(2)-L-P-L*-P-x(2)-Hpo-x-D*-Y-L-x(1,3)-Y-", "[AVILMFGWP].[TSP]LQH[AVILMFGWP]LCR.{2}[ILV].{3}[AVILMFGWP].{2,10}[AVILMFGWP].{2}LP[LIVMF]P.{2}[AVILMFGWP].[DNEQ]YL.{1,3}Y", "*residues are not invariant<br>Hpo = hydrophobic residue"),
("Cell Death/Apoptosis Motifs", "BH3 motif", "–L-[RA]-x-I*-G-D-[ED]-Hpo-[DEN]-", "L[RA].[AVILMFGWP]GD[ED][AVILMFGWP][DEN]", "*other hydrophobics (Hpo) may occur"),
("Cell Death/Apoptosis Motifs", "BH1 domain", "-N-W-G-R-", "NWGR", "none"),
("Cell Death/Apoptosis Motifs", "TRAF Binding Motif", "-P-x-Q-x-[TS]-", "P.Q.[TS]", "interacts with the family of Tumour necrosis factor Receptor-Associated Factors"),
("Protein-Protein Interaction Motifs", "Actin-Binding", "-L-x(2)-I-G-x(3)-[IL]-V-D(2)-[ASN]-I-K(3)-[LMF]-L-G-L-I-W-x(2)-I-L-", "L.{2}IG.{3}[IL]VD{2}[ASN]IK{3}[LMF]LGLIW.{2}IL", "none"),
("Protein-Protein Interaction Motifs", "Methionine Bristles in the Signal Recognition Particle", "–F-T-L-x(2)-Hpo-R-x-Q-M-x(2)-M-[RK](2)-M-G-P-M-x(2)-Hpo(2)-x-M-[LI]-P-G-M-G-x(1,2)-M-P-", "FTL.{2}[AVILMFGWP]R.Q[MLF].{2}[MLF][RK]{2}[MLF]GP[MLF].{2}[AVILMFGWP]{2}.[MLF][LI]PG[MLF]Gx{1,2}[MLF]P", "other hydrophobic residues (Leu or Phe) may replace Met; possible variations in other residues"),
("Protein-Protein Interaction Motifs", "Collagen", "-G-[PA]-[PK]-", "G[PA][PK]", "[PK] = hydroxyproline or hydroxylysine"),
("Protein-Protein Interaction Motifs", "Ankyrin Repeat", "-G-x-T-[PA]-L-H-A(2)-x(7)-[VA]-x(2)-L(2)-x(2)-G-A-x(2,6)-[DN]-", "G.T[PA]LHA{2}.{7}[VA].{2}L{2}.{2}GA.{2,6}[DN]", "none"),
("Protein-Protein Interaction Motifs", "G-Protein Coupled Receptors of the Rhodopsin-Family", "-[ND]-P-x(2)-Y-", "[ND]P.{2}Y", "none"),
("DNA–Protein Binding", "A+T Hook", "-[RK]-[PKGR]-R-G-R-P-[RPK]-[KG]-", "[RK][PKGR]RGRP[RPK][KG]", "none"),
("DNA–Protein Binding", "Hox, Homeo Domain", "-I-Y-P-W-M-K-", "IYPWMK", "this motif is upstream of the 60-residue homeo domain"),
("Eukaryotic Transcription Regulation Motif", "Leucine Zipper", "-K-x(1,6)-L-x(1,6)-L-x(1,6)-L-x(1,6)-L-x(1,6)-L-", "K.{1,6}L.{1,6}L.{1,6}L.{1,6}L.{1,6}L", "none"),
("DNA–Protein Binding", "Coactivator Motif", "–L-x(2)-L(2)-", "L.{2}L{2}", "two repeats of a motif with –L-x(2)-L(2)- core"),
("Protein-Carbohydrate Binding", "Sugar-Binding in Galectins (Galactoside-Binding Lectins)", "-W-G-x-E-x-[RK]-", "WG.E.[RKI]", "galectin-8 if last residue is I"),
("Protein-Carbohydrate Binding", "Heparin-Binding Motif", "-K-K-T-R-", "KKTR", "none"),
("Protein-Carbohydrate Binding", "Glucose Transporters", "-Q-L-S-", "QLS", "GLUT1, GLUT3, and GLUT4"),
("Protein–Carbohydrate Binding", "Glucose Transporters", "H-V-A-", "HVA", "GLUT2 (glucose/fructose transporter)"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-[RK]-D-E-L CO<sub>2</sub>H", "[RK]DEL$", "Vertebrates"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-K-N-E-L", "KNEL", "Vertebrates - partial retention (efficient retention motif: [RK]DEL)"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-D-K-E-L", "DKEL", "Vertebrates - partial retention (efficient retention motif: [RK]DEL)"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-K-E-E-L", "KEEL", "Vertebrates - partial retention (efficient retention motif: [RK]DEL)"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-K-[DT]-E-L", "K[DT]EL", "-K-D-E-L for Drosophila and C. elegans<br>-K-T-E-L for viruses"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-[HA]-D-E-L", "[HA]DEL", "H for budding yeast<br>A for fission yeast"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Endoplasmic Reticulum- Retention Signal", "-[KH]-D-E-L", "[KH]DEL", "Plants; -H-D-E-L-CO<sub>2</sub>H"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Nuclear Localization Signal", "-[KR](2)-x(10)-K*-", "[KR]{2}.{10}K", "*2-3 of the 4 residues after K should also be K (3-4 K's out of 5 residues)"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Nuclear Export Signal", "-L-x(2,3)-L-x(2)-L(2)-", "L.{2,3}L.{2}L{2}", "none"),
("Membrane Protein Sorting Signals", "Tyrosine Motif", "-Y -x(2)-[LIMF]-", "Y.{2}[LIMF]", "involved in targeting of transmembrane proteins to lysosomes, endosomal compartments, and the trans-Golgi network"),
("Membrane Protein Sorting Signals", "C-terminal Di-Leucine Motif", "-D-x(3)-L(2)", "D.{3}LL", "involved in targeting of transmembrane proteins to lysosomes, endosomal compartments, and the trans-Golgi network"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "C-Terminal Peroxisome Targeting Signal (PTS1)", "-[SACKN]-[KRHQNS]-L-CO<sub>2</sub>H", "[SACKN][KRHQNS]L$", "none"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "glycosomal import targeting signal", "-[SACGHNP]-[KRSHMN]-[LIMY] -CO<sub>2</sub>H", "[SACGHNP][KRSHMN][LIMY]$", "none"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Nucleoporin Anchoring", "-x-F-x-F-G-", ".F.FG", "none"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Ca<sup>2+</sup>-Binding Parallel beta-Roll Motif", "-G(2)-x-G-x-D-", "GG.G.D", "In proteins secreted by Gram negative bacteria"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Cadherin (Cell–Cell Adhesion) Motif", "-L-D-R-E-x(4)-Y-x-L-", "LDRE.{4}Y.L", "none"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Cell Adhesion Motif", "-[RK]-G-D-", "[RK]GD", "-K-G-D- occurs in platelets and megakaryocytes, specific for the integrin GPIIb-IIIa"),
("Cell Targeting, Adhesion, and Cell Division Motifs", "Cell Division Motif", "-B-x(2)-C-x-[TES]-x(1,8)-[DE]-[EDTS]-[DE]-", "B.{2}C.[TES].{1,8}[DE][EDTS][DE]", "residues before x(1,8): beta-turn; residues after: alpha-helix"),
("Disulfide Bond Patterns", "EGF Domain", "-C-x(4,7)-C-x(2,3)-G-x-C-x(1,3)-[DN]-x(4)-[FY]-x-C-x-C-x(2)-G-[FYW]-x(0,20)-G-x(2)-C-", "C.{4,7}C.{2,3}G.C.{1,3}[DN].{4}[FY].C.C.{2}G[FYW].{0,20}G.{2}C", "[DN] may also be beta-hydroxy Asp/Asn<br>Cys at (1,3), (2,4), (5,6) are in disulfide bonds"),
("Disulfide Bond Patterns", "Pancreatic Trypsin Protease Inhibitor (Kunitz Domain) Superfamily", "-C-x(8)-C-x(6)-[FYW]-[FY](2)-x(6)-C-x(2)-F-x-[YW]-x-G-C-x(4)-N-x-F-x-[ST]-x(3)-C-x(3)-C-", "C.{8}C.{6}[FYW][FY]{2}.{6}C.{2}F.[YW].GC.{4}N.F.[ST].{3}C.{3}C", "Cys at (1,6), (2,4), (3,5) are in disulfide bonds"),
("Disulfide Bond Patterns", "Four-Disulfide Core, WAP-Type", "-C-P-x(10)-C-x(4)-C-x(2)-[DN]-x(2)-C-x(5)-C-C-x(3)-C-x(3)-C-", "CP.{10}C.{4}C.{2}[DN].{2}C.{5}CC.{3}C.{3}C", "Cys in (1,6), (2,7), (3,5), (4,8) are in disulfide bonds"),
("Disulfide Bond Patterns", "P-Domain (Trefoil Motif)", "-C-x(6)-R-x(2)-C-G-Hpo-x(3,4)-[ST]-x(3)-C-x(4)-C-C-Hpo-x(8)-W-C-[Hpo|H]-", "C.{6}R.{2}CG[VILMFW].{3,4}[ST].{3}C.{4}CC[VILMFW].{8}WC[VILMFWH]", "Hpo = large hydrophobic<br>Cys at (1,5), (2,4), (3,6) are in disulfide bonds");