From 50507b43e764ac44476f7c373c17a54d462079df Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Tue, 17 Mar 2020 15:50:19 +0000 Subject: [PATCH 01/12] Fix Abkhaz language --- wikipron/languagecodes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 45195976..c53e5a1c 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -257,5 +257,7 @@ # Moroccan Arabic. ISO 639-3 only. "ary": "Moroccan Arabic", # Mandarin Chinese. ISO 639-3 only. - "cmn": "Chinese" + "cmn": "Chinese", + # Abkhaz. Would be Abkhazian in ISO 639. + "abk": "Abkhaz" } From 4d6933e5cebe332ce910e5dc0ca7b08957819217 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Tue, 17 Mar 2020 16:37:22 +0000 Subject: [PATCH 02/12] Fix for Avar language --- wikipron/languagecodes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index c53e5a1c..151ea564 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -259,5 +259,7 @@ # Mandarin Chinese. ISO 639-3 only. "cmn": "Chinese", # Abkhaz. Would be Abkhazian in ISO 639. - "abk": "Abkhaz" + "abk": "Abkhaz", + # Avar. Would be Avaric in ISO 639. + "ava": "Avar" } From 545faa0a0ee4a7b17d370d5d78b428cf823e11e0 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Tue, 17 Mar 2020 18:49:16 +0000 Subject: [PATCH 03/12] Fix for Buryat language --- wikipron/languagecodes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 151ea564..de1f24e3 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -261,5 +261,7 @@ # Abkhaz. Would be Abkhazian in ISO 639. "abk": "Abkhaz", # Avar. Would be Avaric in ISO 639. - "ava": "Avar" + "ava": "Avar", + # Buryat. Would be Buriat in ISO 639. + "bua": "Buryat", } From e6392110ed14c2e109a165381e9fc9fa60e2de92 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Wed, 18 Mar 2020 17:25:48 +0000 Subject: [PATCH 04/12] Fix for Chukchi language --- wikipron/languagecodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index de1f24e3..7f4bbd0a 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -264,4 +264,6 @@ "ava": "Avar", # Buryat. Would be Buriat in ISO 639. "bua": "Buryat", + # Chukchi. Would be Chukot in ISO 639. + "ckt": "Chukchi", } From 7f9b565ff13ca0eae45527dcbb2b50b3e70d693e Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Fri, 20 Mar 2020 13:56:34 +0000 Subject: [PATCH 05/12] Brushaski language code --- wikipron/languagecodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 7f4bbd0a..432c32b5 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -266,4 +266,6 @@ "bua": "Buryat", # Chukchi. Would be Chukot in ISO 639. "ckt": "Chukchi", + # Burushaski. ISO 639-3 only. + "bsk": "Burushaski", } From bf252336df0bc2bfad9f08b1d14b62f03f72d60b Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Fri, 20 Mar 2020 14:08:05 +0000 Subject: [PATCH 06/12] Galician language code --- wikipron/languagecodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 432c32b5..84d099ed 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -268,4 +268,6 @@ "ckt": "Chukchi", # Burushaski. ISO 639-3 only. "bsk": "Burushaski", + # Galician. Not in iso639. + "glc": "Galician", } From 5127d3d28a416f6e9ee17df309dfc754a676b5b4 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Fri, 20 Mar 2020 14:28:45 +0000 Subject: [PATCH 07/12] Increasing number of languages --- wikipron/languagecodes.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 84d099ed..69cfcf3e 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -268,6 +268,12 @@ "ckt": "Chukchi", # Burushaski. ISO 639-3 only. "bsk": "Burushaski", - # Galician. Not in iso639. + # Galician. Not in iso639 lib. "glc": "Galician", + # Evenki. Not in iso639 lib. + "evn": "Evenki", + # Southern Yukaghir. Not in iso639 lib. + "yux": "Southern Yukaghir", + # Tundra Nenets. Not in iso639 lib. + "yrk": "Tundra Nenets", } From 13e00b754a19d5592f7ff447503eefa8ce1779db Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Fri, 20 Mar 2020 15:10:46 +0000 Subject: [PATCH 08/12] Added missing language codes --- wikipron/languagecodes.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 69cfcf3e..ac17877e 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -276,4 +276,26 @@ "yux": "Southern Yukaghir", # Tundra Nenets. Not in iso639 lib. "yrk": "Tundra Nenets", + # Estonian. Not in iso639 lib. + "ekk": "Estonian", + # Livvi. Not in iso639 lib. + "olo": "Livvi", + # Kildin Sami. Not in iso639 lib. + "sjd": "Kildin Sami", + # Northern Yukaghir. Not in iso639 lib. + "ykg": "Northern Yukaghir", + # Nanai. Not in iso639 lib. + "gld": "Nanai", + # Persian. Not in iso639 lib. + "pes": "Persian", + # Greenlandic. Would be Kalaallisut in ISO 639. + "kal": "Greenlandic", + # Khanty. Not in iso639 lib. + "kca": "Khanty", + # Ket. Not in iso639 lib. + "ket": "Ket", + # Komi-Permyak. Not in iso639 lib. + "koi": "Komi-Permyak", + # Komi-Zyrian. Not in iso639 lib. + "kpv": "Komi-Zyrian", } From 699e1c58c158292b9ef25eb9fec88a400244240f Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Fri, 20 Mar 2020 19:04:25 +0000 Subject: [PATCH 09/12] Added new language codes --- wikipron/languagecodes.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index ac17877e..bbed08fc 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -298,4 +298,16 @@ "koi": "Komi-Permyak", # Komi-Zyrian. Not in iso639 lib. "kpv": "Komi-Zyrian", + # Lak. Not in iso639 lib. + "lbe": "Lak", + # Lezgi. Would be Lezghian in ISO 639. + "lez": "Lezgi", + # Eastern Mari. Not in iso639 lib. + "mhr": "Eastern Mari", + # Mansi. Not in iso639 lib. + "mns": "Mansi", + # Nganasan. Not in iso639 lib. + "nio": "Nganasan", + # Nivkh. Not in iso639 lib. + "niv": "Nivkh", } From 3bc6ecb869fe17dbb87b85558d9e4ec2d25f5ad5 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Sat, 21 Mar 2020 17:42:25 +0000 Subject: [PATCH 10/12] Removed wrong langauge code and fixed lmy --- wikipron/languagecodes.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index bbed08fc..72e0ea02 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -253,7 +253,7 @@ # Bouyei. ISO 639-3 only. "pcc": "Bouyei", # Lamboya. ISO 639-3 only. - "lmy": "Lamboya", + "lmy": "Laboya", # Moroccan Arabic. ISO 639-3 only. "ary": "Moroccan Arabic", # Mandarin Chinese. ISO 639-3 only. @@ -268,8 +268,6 @@ "ckt": "Chukchi", # Burushaski. ISO 639-3 only. "bsk": "Burushaski", - # Galician. Not in iso639 lib. - "glc": "Galician", # Evenki. Not in iso639 lib. "evn": "Evenki", # Southern Yukaghir. Not in iso639 lib. From e34294ecc4147d3cb90dffafd9259a2bdf086b5f Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Sat, 21 Mar 2020 17:45:41 +0000 Subject: [PATCH 11/12] Added lower case version of language name to languagecodes --- wikipron/languagecodes.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index 72e0ea02..d6d4ca79 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -252,60 +252,89 @@ "tetelcingo nahuatl": "Tetelcingo Nahuatl", # Bouyei. ISO 639-3 only. "pcc": "Bouyei", + "bouyei": "Bouyei", # Lamboya. ISO 639-3 only. "lmy": "Laboya", + "laboya": "Laboya", # Moroccan Arabic. ISO 639-3 only. "ary": "Moroccan Arabic", + "moroccan arabic": "Moroccan Arabic", # Mandarin Chinese. ISO 639-3 only. "cmn": "Chinese", + "chinese": "Chinese", # Abkhaz. Would be Abkhazian in ISO 639. "abk": "Abkhaz", + "abkhaz": "Abkhaz", # Avar. Would be Avaric in ISO 639. "ava": "Avar", + "avar": "Avar", # Buryat. Would be Buriat in ISO 639. "bua": "Buryat", + "buryat": "Buryat", # Chukchi. Would be Chukot in ISO 639. "ckt": "Chukchi", + "chukchi": "Chukchi", # Burushaski. ISO 639-3 only. "bsk": "Burushaski", + "burushaski": "Burushaski", # Evenki. Not in iso639 lib. "evn": "Evenki", + "evenki": "Evenki", # Southern Yukaghir. Not in iso639 lib. "yux": "Southern Yukaghir", + "southern yukaghir": "Southern Yukaghir", # Tundra Nenets. Not in iso639 lib. "yrk": "Tundra Nenets", + "tundra nenets": "Tundra Nenets", # Estonian. Not in iso639 lib. "ekk": "Estonian", + "estonian": "Estonian", # Livvi. Not in iso639 lib. "olo": "Livvi", + "livvi": "Livvi", # Kildin Sami. Not in iso639 lib. "sjd": "Kildin Sami", + "kildin sami": "Kildin Sami", # Northern Yukaghir. Not in iso639 lib. "ykg": "Northern Yukaghir", + "northern yukaghir": "Northern Yukaghir", # Nanai. Not in iso639 lib. "gld": "Nanai", + "nanai": "Nanai", # Persian. Not in iso639 lib. "pes": "Persian", + "persian": "Persian", # Greenlandic. Would be Kalaallisut in ISO 639. "kal": "Greenlandic", + "greenlandic": "Greenlandic", # Khanty. Not in iso639 lib. "kca": "Khanty", + "khanty": "Khanty", # Ket. Not in iso639 lib. "ket": "Ket", + "ket": "Ket", # Komi-Permyak. Not in iso639 lib. "koi": "Komi-Permyak", + "komi-permyak": "Komi-Permyak", # Komi-Zyrian. Not in iso639 lib. "kpv": "Komi-Zyrian", + "komi-zyrian": "Komi-Zyrian", # Lak. Not in iso639 lib. "lbe": "Lak", + "lak": "Lak", # Lezgi. Would be Lezghian in ISO 639. "lez": "Lezgi", + "lezgi": "Lezgi", # Eastern Mari. Not in iso639 lib. "mhr": "Eastern Mari", + "eastern mari": "Eastern Mari", # Mansi. Not in iso639 lib. "mns": "Mansi", + "mansi": "Mansi", # Nganasan. Not in iso639 lib. "nio": "Nganasan", + "nganasan": "Nganasan", # Nivkh. Not in iso639 lib. "niv": "Nivkh", + "nivkh": "Nivkh", } From db1430ca89202ae27afc1fee0f90363d7855f024 Mon Sep 17 00:00:00 2001 From: Tiago Pimentel Date: Sat, 21 Mar 2020 21:19:38 +0000 Subject: [PATCH 12/12] Removing duplicated line for ket language code --- wikipron/languagecodes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wikipron/languagecodes.py b/wikipron/languagecodes.py index d6d4ca79..d03839cd 100644 --- a/wikipron/languagecodes.py +++ b/wikipron/languagecodes.py @@ -312,7 +312,6 @@ "khanty": "Khanty", # Ket. Not in iso639 lib. "ket": "Ket", - "ket": "Ket", # Komi-Permyak. Not in iso639 lib. "koi": "Komi-Permyak", "komi-permyak": "Komi-Permyak",