Skip to content

Commit

Permalink
fix multiple slavic language issues, minor cosmetic change (#180)
Browse files Browse the repository at this point in the history
* improve russian form handling, write tests

Fixed Russian forms being improperly marked as lemmas. Wrote tests.

* add missing common russian case, write tests

* filter `dated` inflection tag, add test

* improve noun gender coloring

Changed masculine, feminine, and neuter tag coloring to something more visually pleasing.

* add missing noun gender and verb aspect

Extracts noun gender and verb aspect from headword `expansion`. Should work excellent for Slavic languages, and even Latin.
  • Loading branch information
seth-js authored Dec 20, 2024
1 parent 6e1850f commit 0814121
Show file tree
Hide file tree
Showing 15 changed files with 2,314 additions and 21 deletions.
32 changes: 29 additions & 3 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function isInflectionGloss(glosses, formOf) {
if(!Array.isArray(formOf)) return false;
for (const {word: lemma} of formOf) {
if(!lemma) continue;
if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}$`).test(gloss))) return true;
if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}($| \(.+?\)$)`).test(gloss))) return true;
}

case 'fr':
Expand Down Expand Up @@ -93,7 +93,8 @@ const blacklistedTags = [
'obsolete',
'archaic',
'used-in-the-form',
'romanization'
'romanization',
'dated'
];

const identityTags = [
Expand Down Expand Up @@ -131,7 +132,7 @@ function handleLine(parsedLine) {

processForms(forms, word, pos);

const {senses} = parsedLine;
const {senses, head_templates} = parsedLine;
if (!senses) return;

/** @type {IpaInfo[]} */
Expand Down Expand Up @@ -164,6 +165,31 @@ function handleLine(parsedLine) {
tags.push(...sense.raw_tags);
}

if (head_templates && targetIso === 'en') {
const tagMatch = [
['pf', 'perfective'],
['impf', 'imperfective'],
['m', 'masculine'],
['f', 'feminine'],
['n', 'neuter'],
['inan', 'inanimate'],
['anim', 'animate'],
];

for (const entry of head_templates) {
if (entry.expansion) {
for (const [match, tag] of tagMatch) {
if (
entry.expansion.replace(/\(.+?\)/g, '').split(' ').includes(match) &&
!tags.includes(tag)
) {
tags.push(tag);
}
}
}
}
}

return {...sense, glossesArray, tags};
}));

Expand Down
3 changes: 2 additions & 1 deletion data/language/tag_order.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"accusative",
"vocative",
"locative",
"instrumental"
"instrumental",
"prepositional"
],
"persons": [
"first-person",
Expand Down
6 changes: 3 additions & 3 deletions data/language/target-language-tags/en/tag_styles.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"masc": ".tag[data-details='masculine'] .tag-label{background-color: blue;}",
"fem": ".tag[data-details='feminine'] .tag-label{background-color: red;}",
"neut": ".tag[data-details='neuter'] .tag-label{background-color: green;}"
"masc": ".tag[data-details='masculine'] .tag-label{background-color: #4d82e8;}",
"fem": ".tag[data-details='feminine'] .tag-label{background-color: #ca4d93;}",
"neut": ".tag[data-details='neuter'] .tag-label{background-color: #40ac65;}"
}
21 changes: 14 additions & 7 deletions data/test/dict/la/en/tag_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
"declension-1",
0
],
[
"fem",
"",
-1,
"feminine",
1
],
[
"n",
"partOfSpeech",
Expand Down Expand Up @@ -48,6 +55,13 @@
"declension-2",
0
],
[
"neut",
"",
-1,
"neuter",
1
],
[
"not-comp",
"",
Expand Down Expand Up @@ -76,13 +90,6 @@
"declension-4",
0
],
[
"fem",
"",
-1,
"feminine",
1
],
[
"irreg",
"",
Expand Down
4 changes: 2 additions & 2 deletions data/test/dict/la/en/term_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
[
"fama",
"fāma",
"decl-1 n",
"decl-1 fem n",
"n",
0,
[
Expand Down Expand Up @@ -324,7 +324,7 @@
[
"lilium",
"līlium",
"decl-2 n",
"decl-2 neut n",
"n",
0,
[
Expand Down
79 changes: 79 additions & 0 deletions data/test/dict/ru/en/tag_bank_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
[
[
"masc",
"",
-1,
"masculine",
1
],
[
"inanim",
"",
0,
"inanimate",
0
],
[
"n",
"partOfSpeech",
-1,
"noun",
1
],
[
"fig",
"",
0,
"figuratively",
0
],
[
"sl",
"",
0,
"slang",
0
],
[
"pf",
"",
0,
"perfective",
0
],
[
"v",
"partOfSpeech",
-1,
"verb",
1
],
[
"col",
"",
0,
"colloquial",
0
],
[
"impers",
"",
0,
"impersonal",
0
],
[
"reltnl",
"",
0,
"relational",
0
],
[
"adj",
"partOfSpeech",
-1,
"adjective",
1
]
]
Loading

0 comments on commit 0814121

Please sign in to comment.