Skip to content

Commit

Permalink
[xlm tok] config dict: fix str into int to match definition (huggingf…
Browse files Browse the repository at this point in the history
  • Loading branch information
stas00 authored and Zigur committed Oct 26, 2020
1 parent 2bdc454 commit 69b4e4d
Showing 1 changed file with 152 additions and 152 deletions.
304 changes: 152 additions & 152 deletions src/transformers/tokenization_xlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,37 +79,37 @@
"xlm-mlm-en-2048": {"do_lowercase_and_remove_accent": True},
"xlm-mlm-ende-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {"0": "de", "1": "en"},
"id2lang": {0: "de", 1: "en"},
"lang2id": {"de": 0, "en": 1},
},
"xlm-mlm-enfr-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {"0": "en", "1": "fr"},
"id2lang": {0: "en", 1: "fr"},
"lang2id": {"en": 0, "fr": 1},
},
"xlm-mlm-enro-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {"0": "en", "1": "ro"},
"id2lang": {0: "en", 1: "ro"},
"lang2id": {"en": 0, "ro": 1},
},
"xlm-mlm-tlm-xnli15-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {
"0": "ar",
"1": "bg",
"2": "de",
"3": "el",
"4": "en",
"5": "es",
"6": "fr",
"7": "hi",
"8": "ru",
"9": "sw",
"10": "th",
"11": "tr",
"12": "ur",
"13": "vi",
"14": "zh",
0: "ar",
1: "bg",
2: "de",
3: "el",
4: "en",
5: "es",
6: "fr",
7: "hi",
8: "ru",
9: "sw",
10: "th",
11: "tr",
12: "ur",
13: "vi",
14: "zh",
},
"lang2id": {
"ar": 0,
Expand All @@ -132,21 +132,21 @@
"xlm-mlm-xnli15-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {
"0": "ar",
"1": "bg",
"2": "de",
"3": "el",
"4": "en",
"5": "es",
"6": "fr",
"7": "hi",
"8": "ru",
"9": "sw",
"10": "th",
"11": "tr",
"12": "ur",
"13": "vi",
"14": "zh",
0: "ar",
1: "bg",
2: "de",
3: "el",
4: "en",
5: "es",
6: "fr",
7: "hi",
8: "ru",
9: "sw",
10: "th",
11: "tr",
12: "ur",
13: "vi",
14: "zh",
},
"lang2id": {
"ar": 0,
Expand All @@ -168,34 +168,34 @@
},
"xlm-clm-enfr-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {"0": "en", "1": "fr"},
"id2lang": {0: "en", 1: "fr"},
"lang2id": {"en": 0, "fr": 1},
},
"xlm-clm-ende-1024": {
"do_lowercase_and_remove_accent": True,
"id2lang": {"0": "de", "1": "en"},
"id2lang": {0: "de", 1: "en"},
"lang2id": {"de": 0, "en": 1},
},
"xlm-mlm-17-1280": {
"do_lowercase_and_remove_accent": False,
"id2lang": {
"0": "ar",
"1": "de",
"2": "en",
"3": "es",
"4": "fr",
"5": "hi",
"6": "it",
"7": "ja",
"8": "ko",
"9": "nl",
"10": "pl",
"11": "pt",
"12": "ru",
"13": "sv",
"14": "tr",
"15": "vi",
"16": "zh",
0: "ar",
1: "de",
2: "en",
3: "es",
4: "fr",
5: "hi",
6: "it",
7: "ja",
8: "ko",
9: "nl",
10: "pl",
11: "pt",
12: "ru",
13: "sv",
14: "tr",
15: "vi",
16: "zh",
},
"lang2id": {
"ar": 0,
Expand All @@ -220,106 +220,106 @@
"xlm-mlm-100-1280": {
"do_lowercase_and_remove_accent": False,
"id2lang": {
"0": "af",
"1": "als",
"2": "am",
"3": "an",
"4": "ang",
"5": "ar",
"6": "arz",
"7": "ast",
"8": "az",
"9": "bar",
"10": "be",
"11": "bg",
"12": "bn",
"13": "br",
"14": "bs",
"15": "ca",
"16": "ceb",
"17": "ckb",
"18": "cs",
"19": "cy",
"20": "da",
"21": "de",
"22": "el",
"23": "en",
"24": "eo",
"25": "es",
"26": "et",
"27": "eu",
"28": "fa",
"29": "fi",
"30": "fr",
"31": "fy",
"32": "ga",
"33": "gan",
"34": "gl",
"35": "gu",
"36": "he",
"37": "hi",
"38": "hr",
"39": "hu",
"40": "hy",
"41": "ia",
"42": "id",
"43": "is",
"44": "it",
"45": "ja",
"46": "jv",
"47": "ka",
"48": "kk",
"49": "kn",
"50": "ko",
"51": "ku",
"52": "la",
"53": "lb",
"54": "lt",
"55": "lv",
"56": "mk",
"57": "ml",
"58": "mn",
"59": "mr",
"60": "ms",
"61": "my",
"62": "nds",
"63": "ne",
"64": "nl",
"65": "nn",
"66": "no",
"67": "oc",
"68": "pl",
"69": "pt",
"70": "ro",
"71": "ru",
"72": "scn",
"73": "sco",
"74": "sh",
"75": "si",
"76": "simple",
"77": "sk",
"78": "sl",
"79": "sq",
"80": "sr",
"81": "sv",
"82": "sw",
"83": "ta",
"84": "te",
"85": "th",
"86": "tl",
"87": "tr",
"88": "tt",
"89": "uk",
"90": "ur",
"91": "uz",
"92": "vi",
"93": "war",
"94": "wuu",
"95": "yi",
"96": "zh",
"97": "zh_classical",
"98": "zh_min_nan",
"99": "zh_yue",
0: "af",
1: "als",
2: "am",
3: "an",
4: "ang",
5: "ar",
6: "arz",
7: "ast",
8: "az",
9: "bar",
10: "be",
11: "bg",
12: "bn",
13: "br",
14: "bs",
15: "ca",
16: "ceb",
17: "ckb",
18: "cs",
19: "cy",
20: "da",
21: "de",
22: "el",
23: "en",
24: "eo",
25: "es",
26: "et",
27: "eu",
28: "fa",
29: "fi",
30: "fr",
31: "fy",
32: "ga",
33: "gan",
34: "gl",
35: "gu",
36: "he",
37: "hi",
38: "hr",
39: "hu",
40: "hy",
41: "ia",
42: "id",
43: "is",
44: "it",
45: "ja",
46: "jv",
47: "ka",
48: "kk",
49: "kn",
50: "ko",
51: "ku",
52: "la",
53: "lb",
54: "lt",
55: "lv",
56: "mk",
57: "ml",
58: "mn",
59: "mr",
60: "ms",
61: "my",
62: "nds",
63: "ne",
64: "nl",
65: "nn",
66: "no",
67: "oc",
68: "pl",
69: "pt",
70: "ro",
71: "ru",
72: "scn",
73: "sco",
74: "sh",
75: "si",
76: "simple",
77: "sk",
78: "sl",
79: "sq",
80: "sr",
81: "sv",
82: "sw",
83: "ta",
84: "te",
85: "th",
86: "tl",
87: "tr",
88: "tt",
89: "uk",
90: "ur",
91: "uz",
92: "vi",
93: "war",
94: "wuu",
95: "yi",
96: "zh",
97: "zh_classical",
98: "zh_min_nan",
99: "zh_yue",
},
"lang2id": {
"af": 0,
Expand Down

0 comments on commit 69b4e4d

Please sign in to comment.