Skip to content

Commit

Permalink
Merge pull request #5173 from wikimedia/T384073-dupes
Browse files Browse the repository at this point in the history
Remove duplicate languages
  • Loading branch information
mazevedofs authored Jan 28, 2025
2 parents 50d644c + 5b22cbc commit fc33413
Show file tree
Hide file tree
Showing 144 changed files with 879 additions and 710 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ class WikipediaLanguageCommandLineUtilityAPI {
return Wikipedia(languageCode: code, languageName: name, localName: localname, altISOCode: "nb")
}

// If there's a site array populated with a subdomain that does NOT equal languageCode, skip. It might show up as a dupe in the languages list.
// TODO: This is a temporary workaround to remove duplicated languages, but eventually a proper data migration for the user might need to occur to switch from the old language code to the new.
if let sites = result["site"] as? [[String : Any]] {
let site = sites.first

if let siteURLString = site?["url"] as? String,
let components = URLComponents(string: siteURLString),
let host = components.host,
let hostLangCode = host.components(separatedBy: ".").first {

if siteURLString.hasSuffix("wikipedia.org") {
if code != hostLangCode && code != "yue" { // Cantonese has already slipped in here twice, so leaving it in as a dupe until we can clean up user databases.
return nil
}
}

}
}

return Wikipedia(languageCode: code, languageName: name, localName: localname, altISOCode: nil)
}
// Add testwiki and test2wiki, they are not returned by the site matrix
Expand Down
35 changes: 5 additions & 30 deletions Wikipedia/Code/wikipedia-languages.json
Original file line number Diff line number Diff line change
Expand Up @@ -569,11 +569,6 @@
"languageName" : "Ghanaian Pidgin",
"localName" : "Ghanaian Pidgin"
},
{
"languageCode" : "gsw",
"languageName" : "Alemannisch",
"localName" : "Alemannic"
},
{
"languageCode" : "gu",
"languageName" : "ગુજરાતી",
Expand Down Expand Up @@ -969,11 +964,6 @@
"languageName" : "latviešu",
"localName" : "Latvian"
},
{
"languageCode" : "lzh",
"languageName" : "文言",
"localName" : "Literary Chinese"
},
{
"languageCode" : "mad",
"languageName" : "Madhurâ",
Expand Down Expand Up @@ -1109,11 +1099,6 @@
"languageName" : "Nāhuatl",
"localName" : "Nahuatl"
},
{
"languageCode" : "nan",
"languageName" : "閩南語 \/ Bân-lâm-gú",
"localName" : "Minnan"
},
{
"languageCode" : "nap",
"languageName" : "Napulitano",
Expand Down Expand Up @@ -1360,11 +1345,6 @@
"languageName" : "русиньскый",
"localName" : "Rusyn"
},
{
"languageCode" : "rup",
"languageName" : "armãneashti",
"localName" : "Aromanian"
},
{
"languageCode" : "rw",
"languageName" : "Ikinyarwanda",
Expand Down Expand Up @@ -1415,11 +1395,6 @@
"languageName" : "Sängö",
"localName" : "Sango"
},
{
"languageCode" : "sgs",
"languageName" : "žemaitėška",
"localName" : "Samogitian"
},
{
"languageCode" : "sh",
"languageName" : "srpskohrvatski \/ српскохрватски",
Expand Down Expand Up @@ -1595,6 +1570,11 @@
"languageName" : "ትግርኛ",
"localName" : "Tigrinya"
},
{
"languageCode" : "tig",
"languageName" : "ትግሬ",
"localName" : "Tigre"
},
{
"languageCode" : "tk",
"languageName" : "Türkmençe",
Expand Down Expand Up @@ -1720,11 +1700,6 @@
"languageName" : "Volapük",
"localName" : "Volapük"
},
{
"languageCode" : "vro",
"languageName" : "võro",
"localName" : "Võro"
},
{
"languageCode" : "wa",
"languageName" : "walon",
Expand Down
124 changes: 124 additions & 0 deletions Wikipedia/Code/wikipedia-magicwords/tig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
[
{
"aliases" : [
"alt=$1"
],
"name" : "img_alt"
},
{
"aliases" : [
"baseline"
],
"name" : "img_baseline"
},
{
"aliases" : [
"border"
],
"name" : "img_border"
},
{
"aliases" : [
"bottom"
],
"name" : "img_bottom"
},
{
"aliases" : [
"center",
"centre"
],
"name" : "img_center"
},
{
"aliases" : [
"frame",
"framed",
"enframed"
],
"name" : "img_framed"
},
{
"aliases" : [
"frameless"
],
"name" : "img_frameless"
},
{
"aliases" : [
"left"
],
"name" : "img_left"
},
{
"aliases" : [
"middle"
],
"name" : "img_middle"
},
{
"aliases" : [
"none"
],
"name" : "img_none"
},
{
"aliases" : [
"right"
],
"name" : "img_right"
},
{
"aliases" : [
"sub"
],
"name" : "img_sub"
},
{
"aliases" : [
"super",
"sup"
],
"name" : "img_super"
},
{
"aliases" : [
"text-bottom"
],
"name" : "img_text_bottom"
},
{
"aliases" : [
"text-top"
],
"name" : "img_text_top"
},
{
"aliases" : [
"thumb",
"thumbnail"
],
"name" : "img_thumbnail"
},
{
"aliases" : [
"top"
],
"name" : "img_top"
},
{
"aliases" : [
"upright",
"upright=$1",
"upright $1"
],
"name" : "img_upright"
},
{
"aliases" : [
"ፋይል",
"Image"
],
"name" : "file_namespace"
}
]
6 changes: 3 additions & 3 deletions Wikipedia/Code/wikipedia-namespaces/alt.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@
"ОБСУЖДЕНИЕ ИЗОБРАЖЕНИЯ" : 7,
"ТУРУЖААЧЫ" : 2,
"ТУРУЖААЧЫНЫ ШӰӰЖЕРИ" : 3,
"ӰЛЕКЕР" : 10,
"ӰЛЕКЕРДИ ШӰӰЖЕРИ" : 11,
"ФАЙЛ" : 6,
"ФАЙЛДЫ ШӰӰЖЕРИ" : 7,
"ШӰӰЖӰ" : 1,
"ӰЛЕКЕР" : 10,
"ӰЛЕКЕРДИ ШӰӰЖЕРИ" : 11
"ШӰӰЖӰ" : 1
}
}
2 changes: 1 addition & 1 deletion Wikipedia/Code/wikipedia-namespaces/ami.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
"MODULE" : 828,
"MODULE TALK" : 829,
"NIPADAMA'" : 12,
"NIPADAMA' A MASASOWAL" : 13,
"NIPADAMA’" : 12,
"NIPADAMA' A MASASOWAL" : 13,
"PROJECT" : 4,
"PROJECT TALK" : 5,
"SPECIAL" : -1,
Expand Down
2 changes: 1 addition & 1 deletion Wikipedia/Code/wikipedia-namespaces/an.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
"namespace" : {
"" : 0,
"ADUYA" : 12,
"CATEGORÍA" : 14,
"CATEGORY" : 14,
"CATEGORY TALK" : 15,
"CATEGORÍA" : 14,
"DESCUSIÓN" : 1,
"DESCUSIÓN ADUYA" : 13,
"DESCUSIÓN CATEGORÍA" : 15,
Expand Down
22 changes: 11 additions & 11 deletions Wikipedia/Code/wikipedia-namespaces/ang.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"BILIÞMOTUNG" : 7,
"BILIÞMŌTUNG" : 7,
"BRUCEND" : 2,
"BRUCENDMOTUNG" : 3,
"BRŪCEND" : 2,
"BRUCENDMOTUNG" : 3,
"BRŪCENDMŌTUNG" : 3,
"BYSEN" : 10,
"BYSENGESPREC" : 11,
Expand Down Expand Up @@ -49,33 +49,33 @@
"USER" : 2,
"USER TALK" : 3,
"WICIPAEDIA" : 4,
"WICIPAEDIAMOTUNG" : 5,
"WICIPÆDIA" : 4,
"WICIPÆDIAMOTUNG" : 5,
"WICIPǢDIA" : 4,
"WICIPAEDIAMOTUNG" : 5,
"WICIPÆDIAMOTUNG" : 5,
"WICIPǢDIAMŌTUNG" : 5,
"WIKIPAEDIA" : 4,
"WIKIPAEDIAMOTUNG" : 5,
"WIKIPEDIA" : 4,
"WIKIPEDIA TALK" : 5,
"WIKIPÆDIA" : 4,
"WIKIPÆDIAMOTUNG" : 5,
"WIKIPǢDIA" : 4,
"WIKIPAEDIAMOTUNG" : 5,
"WIKIPÆDIAMOTUNG" : 5,
"WIKIPǢDIAMŌTUNG" : 5,
"WIKIPEDIA" : 4,
"WIKIPEDIA TALK" : 5,
"WP" : 4,
"YMELE" : 6,
"YMELMŌTUNG" : 7,
"ǷICIPAEDIA" : 4,
"ǷICIPAEDIAMOTUNG" : 5,
"ǷICIPÆDIA" : 4,
"ǷICIPÆDIAMOTUNG" : 5,
"ǷICIPǢDIA" : 4,
"ǷICIPAEDIAMOTUNG" : 5,
"ǷICIPÆDIAMOTUNG" : 5,
"ǷICIPǢDIAMŌTUNG" : 5,
"ǷIKIPAEDIA" : 4,
"ǷIKIPAEDIAMOTUNG" : 5,
"ǷIKIPÆDIA" : 4,
"ǷIKIPÆDIAMOTUNG" : 5,
"ǷIKIPǢDIA" : 4,
"ǷIKIPAEDIAMOTUNG" : 5,
"ǷIKIPÆDIAMOTUNG" : 5,
"ǷIKIPǢDIAMŌTUNG" : 5
}
}
12 changes: 6 additions & 6 deletions Wikipedia/Code/wikipedia-namespaces/ann.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
"MODULE" : 828,
"MODULE TALK" : 829,
"NTAP-UBỌK" : 12,
"ỌGBỌN̄" : 14,
"ÒSIKWAAN̄" : 2,
"PROJECT" : 4,
"PROJECT TALK" : 5,
"SPECIAL" : -1,
Expand All @@ -32,17 +34,15 @@
"UKPATU FAILU" : 7,
"UKPATU MEDIAWIKI" : 9,
"UKPATU NTAP-UBỌK" : 13,
"UKPATU ỌGBỌN̄" : 15,
"UKPATU ÒSIKWAAN̄" : 3,
"UKPATU TEMPULET" : 11,
"UKPATU WÌKÌPEDIA" : 5,
"UKPATU ÒSIKWAAN̄" : 3,
"UKPATU ỌGBỌN̄" : 15,
"USER" : 2,
"USER TALK" : 3,
"WIKIPEDIA" : 4,
"WIKIPEDIA TALK" : 5,
"WP" : 4,
"WÌKÌPEDIA" : 4,
"ÒSIKWAAN̄" : 2,
"ỌGBỌN̄" : 14
"WIKIPEDIA TALK" : 5,
"WP" : 4
}
}
14 changes: 7 additions & 7 deletions Wikipedia/Code/wikipedia-namespaces/as.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@
"মিডিয়াৱিকি আলোচনা" : 9,
"মেডিয়াৱিকি" : 8,
"মেডিয়াৱিকি বাৰ্তা" : 9,
"ৱিকিচ'ৰা" : 100,
"ৱিকিচ'ৰা আলোচনা" : 101,
"ৱিকিপিডিয়া" : 4,
"ৱিকিপিডিয়া वार्ता" : 5,
"ৱিকিপিডিয়া বার্তা" : 5,
"ৱিকিপিডিয়া বাৰ্তা" : 5,
"শ্রেণী" : 14,
"শ্রেণী বার্তা" : 15,
"শ্ৰেণী" : 14,
Expand All @@ -69,12 +75,6 @@
"সহায় বাৰ্তা" : 13,
"সাঁচ" : 10,
"সাঁচ বার্তা" : 11,
"সাঁচ বাৰ্তা" : 11,
"ৱিকিচ'ৰা" : 100,
"ৱিকিচ'ৰা আলোচনা" : 101,
"ৱিকিপিডিয়া" : 4,
"ৱিকিপিডিয়া वार्ता" : 5,
"ৱিকিপিডিয়া বার্তা" : 5,
"ৱিকিপিডিয়া বাৰ্তা" : 5
"সাঁচ বাৰ্তা" : 11
}
}
Loading

0 comments on commit fc33413

Please sign in to comment.