-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert.c
30 lines (28 loc) · 1.08 KB
/
convert.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
//some simple C sourcecode that converts a unicode codepoint to a string in utf-8.
const char *u8ch_tostr(unsigned ch){
static unsigned char buf[5]; // every utf-8 character is <=4 bytes
buf[4]=0;
if(ch<0200){ // ascii is a subset of utf-8
buf[3]=ch;
return buf+3;
}
if(ch<04000){ // characters under 0x7ff are 2 bytes
buf[3]=(ch&077) + 0200;
buf[2]=(ch/0100) + 0300;
return buf+2;
}
if(ch<0200000){ // characters >= 0xFFFF are 3 bytes
buf[3]=(ch&077) + 0200;
buf[2]=(ch/0100&077) + 0200;
buf[1]=(ch/010000) + 0340;
return buf+1;
}
buf[3]=(ch&077) + 0200; // all other characters are 4 bytes
buf[2]=(ch/0100&077) + 0200;
buf[1]=(ch/010000&077) + 0200;
buf[0]=(ch/01000000) + 0360;
return buf;
}
---
//Oren Watson (he/him)
//orenwatson@tutanota.com