-
Notifications
You must be signed in to change notification settings - Fork 2
/
emacsenc.c
130 lines (122 loc) · 3.68 KB
/
emacsenc.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/*
* emacsenc.c - translate our internal character set codes to and from
* GNU Emacs coding system symbols. Derived from running M-x
* list-coding-systems in Emacs 21.3.
*
*/
#include <ctype.h>
#include "charset.h"
#include "internal.h"
static const struct {
const char *name;
int charset;
} emacsencs[] = {
/*
* Where multiple encoding names map to the same encoding id
* (such as iso-latin-1 and iso-8859-1), the first is considered
* canonical and will be returned when translating the id to a
* string.
*/
{ "us-ascii", CS_ASCII },
{ "iso-latin-9", CS_ISO8859_15 },
{ "iso-8859-15", CS_ISO8859_15 },
{ "latin-9", CS_ISO8859_15 },
{ "latin-0", CS_ISO8859_15 },
{ "iso-latin-1", CS_ISO8859_1 },
{ "iso-8859-1", CS_ISO8859_1 },
{ "latin-1", CS_ISO8859_1 },
{ "iso-latin-2", CS_ISO8859_2 },
{ "iso-8859-2", CS_ISO8859_2 },
{ "latin-2", CS_ISO8859_2 },
{ "iso-latin-3", CS_ISO8859_3 },
{ "iso-8859-3", CS_ISO8859_3 },
{ "latin-3", CS_ISO8859_3 },
{ "iso-latin-4", CS_ISO8859_4 },
{ "iso-8859-4", CS_ISO8859_4 },
{ "latin-4", CS_ISO8859_4 },
{ "cyrillic-iso-8bit", CS_ISO8859_5 },
{ "iso-8859-5", CS_ISO8859_5 },
{ "greek-iso-8bit", CS_ISO8859_7 },
{ "iso-8859-7", CS_ISO8859_7 },
{ "hebrew-iso-8bit", CS_ISO8859_8 },
{ "iso-8859-8", CS_ISO8859_8 },
{ "iso-8859-8-e", CS_ISO8859_8 },
{ "iso-8859-8-i", CS_ISO8859_8 },
{ "iso-latin-5", CS_ISO8859_9 },
{ "iso-8859-9", CS_ISO8859_9 },
{ "latin-5", CS_ISO8859_9 },
{ "chinese-big5", CS_BIG5 },
{ "big5", CS_BIG5 },
{ "cn-big5", CS_BIG5 },
{ "cp437", CS_CP437 },
{ "cp850", CS_CP850 },
{ "cp866", CS_CP866 },
{ "cp1250", CS_CP1250 },
{ "cp1251", CS_CP1251 },
{ "cp1253", CS_CP1253 },
{ "cp1257", CS_CP1257 },
{ "japanese-iso-8bit", CS_EUC_JP },
{ "euc-japan-1990", CS_EUC_JP },
{ "euc-japan", CS_EUC_JP },
{ "euc-jp", CS_EUC_JP },
{ "iso-2022-jp", CS_ISO2022_JP },
{ "junet", CS_ISO2022_JP },
{ "korean-iso-8bit", CS_EUC_KR },
{ "euc-kr", CS_EUC_KR },
{ "euc-korea", CS_EUC_KR },
{ "iso-2022-kr", CS_ISO2022_KR },
{ "korean-iso-7bit-lock", CS_ISO2022_KR },
{ "mac-roman", CS_MAC_ROMAN },
{ "cyrillic-koi8", CS_KOI8_R },
{ "koi8-r", CS_KOI8_R },
{ "koi8", CS_KOI8_R },
{ "japanese-shift-jis", CS_SHIFT_JIS },
{ "shift_jis", CS_SHIFT_JIS },
{ "sjis", CS_SHIFT_JIS },
{ "thai-tis620", CS_ISO8859_11 },
{ "th-tis620", CS_ISO8859_11 },
{ "tis620", CS_ISO8859_11 },
{ "tis-620", CS_ISO8859_11 },
{ "mule-utf-16-be", CS_UTF16BE },
{ "utf-16-be", CS_UTF16BE },
{ "mule-utf-16-le", CS_UTF16LE },
{ "utf-16-le", CS_UTF16LE },
{ "mule-utf-8", CS_UTF8 },
{ "utf-8", CS_UTF8 },
{ "vietnamese-viscii", CS_VISCII },
{ "viscii", CS_VISCII },
{ "iso-latin-8", CS_ISO8859_14 },
{ "iso-8859-14", CS_ISO8859_14 },
{ "latin-8", CS_ISO8859_14 },
{ "compound-text", CS_CTEXT },
{ "x-ctext", CS_CTEXT },
{ "ctext", CS_CTEXT },
{ "chinese-hz", CS_HZ },
{ "hz-gb-2312", CS_HZ },
{ "hz", CS_HZ },
};
const char *charset_to_emacsenc(int charset)
{
int i;
for (i = 0; i < (int)lenof(emacsencs); i++)
if (charset == emacsencs[i].charset)
return emacsencs[i].name;
return NULL; /* not found */
}
int charset_from_emacsenc(const char *name)
{
int i;
for (i = 0; i < (int)lenof(emacsencs); i++) {
const char *p, *q;
p = name;
q = emacsencs[i].name;
while (*p || *q) {
if (tolower((unsigned char)*p) != tolower((unsigned char)*q))
break;
p++; q++;
}
if (!*p && !*q)
return emacsencs[i].charset;
}
return CS_NONE; /* not found */
}