-
Notifications
You must be signed in to change notification settings - Fork 4
/
rules-gv.txt
122 lines (122 loc) · 3.8 KB
/
rules-gv.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#######################################################
# an fhoirm cheart Gaelg a lorg i dtosach báire...
# numerical codes aren't used (from gramadoir):
# -1=capitalization changes, etc. that can be applied silently
# 0=derivational morphology 1=non-standard or pre-standard variants
# 2=errors
#######################################################
^([A-CEFGIJKLOPQRSU-ZÁÉÍÓÚÀÈÌÒÙÇ])([A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*[A-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ $1\L$2 -1
^D([A-GI-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ D\L$1 -1
^D(H[A-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ D\L$1 -1
^D'([AEIOUÁÉÍÓÚÀÈÌÒÙF])([A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ d'$1\L$2 -1
^DH'([AEIOUÁÉÍÓÚÀÈÌÒÙF])([A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ dh'$1\L$2 -1
^H([A-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ H\L$1 -1
^H-([A-ZÁÉÍÓÚÀÈÌÒÙÇ])([A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ h-$1\L$2 -1
^M([A-ZÁÉÍÓÚÀÈÌÒÙÇF][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ M\L$1 -1
^M'([AEIOUÁÉÍÓÚÀÈÌÒÙF])([A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ m'$1\L$2 -1
^N([A-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ N\L$1 -1
^T([A-ZÁÉÍÓÚÀÈÌÒÙÇ][A-ZÁÉÍÓÚÀÈÌÒÙÇ'-]*)$ T\L$1 -1
([^_]{3})-([A-ZÁÉÍÓÚÀÈÌÒÙÇ]) $1-\l$2 -1
^'([A-ZÁÉÍÓÚÀÈÌÒÙÇ]) '\l$1 -1
^([A-ZÇÁÉÍÓÚÀÈÌÒÙÇ]) \l$1 -1
^d'([AEIOUÁÉÍÓÚÀÈÌÒÙF]) d'\l$1 -1
^dh'([AEIOUÁÉÍÓÚÀÈÌÒÙF]) d'\l$1 -1
^h([AEIOUÁÉÍÓÚÀÈÌÒÙ]) h\l$1 -1
^m'([AEIOUÁÉÍÓÚÀÈÌÒÙF]) m'\l$1 -1
# Fo-Rheynn => fo-rheynn
(..)-([A-ZÁÉÍÓÚÀÈÌÒÙÇ]) $1-\l$2 -1
# hand-in-hand with code for reading in multi-xx...
# always adds a fully lowercased version to hash
^(.*[A-ZÁÉÍÓÚÀÈÌÒÙÇ].*_.+)$ \L$1 1
^(.+_.*[A-ZÁÉÍÓÚÀÈÌÒÙÇ].*)$ \L$1 1
# Hollanee -> hOllanee
^H([aeiou]) h\u$1 1
# h-Ellanyn -> hEllanyn
^h- h 1
# h'Inshey -> hInshey
^h' h 1
# g'aase -> gaase, g'irree -> girree
^g' g 1
# d'aase -> daase, d'oanluck -> doanluck
^d' d 1
# alternate nasalization of initial g
^n'gh ng 1
# alternate nasalization of initial j
^n'y y 1
# alternate lenition of initial gi
^gh([ei]) y$1 1
^Gh([ei]) Y$1 1
# sometimes in corpus prefix "n" on conditional verb includes apost...
# e.g. "cha n'eaishtagh"; compare PM p.16 1.9.3
^n' n 1
# whueeyl, etc. alt lenition of queeyl (wheeyl standardly)
^whu wh 1
# pronomials for marish
^mâr mar 1
# ôney -> oney, etc.
ôn on 1
# non-standard diacritics
ï i 1
#
#êgin egin 1
#mêr mer 1
ê e 1
# imperative plural; PM p.121
(..)shiu$ $1jee 1
# emphatics aym's -> ayms (PM p.55)
's$ s 1
# yn ven s'aaley; assumes we have all "ny s'ADJ" forms in multi-gv
# this rules is half-way between "fix gv" and "convert to ga"
^s'(..) is_$1 1
^drogh-(..) drogh_$1 1
# shleayst lenites to leayst in i.pl (and PM p.19), but lheayst common enough
^lh l 1
# atchim -> atçhim
tch tçh 1
^chag çhag 1
chamyr çhamyr 1
chanst çhanst 1
chapp çhapp 1
charv çharv 1
chee çhee 1
chell çhell 1
cheng çheng 1
chenn çhenn 1
^chesh çhesh 1
^Chesh Çhesh 1
cheu çheu 1
chill çhill 1
ching çhing 1
chiolt çhiolt 1
chirm çhirm 1
chirv çhirv 1
chym çhym 1
chyn çhyn 1
myrch myrçh 1
preach preaçh 1
brâ bragh 1
dênee denee 1
# future of verbs ending in e: soie-ee
e-ee$ ee 1
# ver-ym(s) => verym
#-ym$ ym 1
-yms$ ym 1
# hyndaa-ys => hyndaays
#-ys$ ys 1
#-ee$ ee 1
#^mee- neu 1
#^neu- neu 1
# lenition of bw-, mw- is "v" in i.pl; alternate "w" possible too; PM p.19
^w v 1
# enmyssyn -> enmysyn (former in dictionaries, latter generated by i.pl)
ssyn$ syn 1
# drop all hyphens as last resort
(.)- $1 1
#######################################################
# ansin iarracht ar fhocal sa sprioctheanga a aimsiú...
# turas góidrisc den chuid is mó!
#######################################################
^([0-9]+0)yn$ $1í 1
^([1-9][0-9]*)oo$ $1ú 1
agh$ ach 1
aghd$ acht 1