-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathencode.c
88 lines (67 loc) · 1.44 KB
/
encode.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/**
* libutf8 - `encode.c'
*
* (c) 2013 joseph werle <joseph.werle@gmail.com>
*/
#define _BSD_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <errno.h>
#include "utf8.h"
char *
utf8_encode (const char *str) {
// encoded char buffer
unsigned char buf[UTF8_MAX_BUFFER_LENGTH];
// code pointer pointer
unsigned char cp = 0;
// byte for encoded bytes
unsigned char b = 0;
// code point value
unsigned int c = 0;
// stream index
int i = 0;
// encoded buffer index
int n = 0;
// byte count
int count = 0;
// byte offset
int offset = 0;
while ('\0' != (cp = (str[i++]))) {
if (errno > 0) {
return NULL;
}
c = (unsigned int) cp;
if (UTF8_IN_URANGE(c, 0xD800, 0xDFFF)) {
errno = UTF8E_CODE_POINT_OOB;
break;
}
if (EOF == c) {
buf[n++] = EOF;
break;
}
if (UTF8_IN_URANGE(c, 0x0000, 0x07F)) {
buf[n++] = c;
continue;
}
if (UTF8_IN_URANGE(c, 0x0080, 0x07FF)) {
count = 1;
offset = 0xC0;
} else if (UTF8_IN_URANGE(c, 0x0800, 0xFFFF)) {
count = 2;
offset = 0xE0;
} else if (UTF8_IN_URANGE(c, 0x10000, 0x10FFFF)) {
count = 3;
offset = 0xF0;
}
buf[n++] = (c/pow(64, count)) + offset;
while (count > 0) {
b = c/pow(64, count - 1);
buf[n++] = 0x80 + (b % 64);
count--;
}
}
buf[n] = '\0';
return strdup((char *)buf);
}