-
Notifications
You must be signed in to change notification settings - Fork 24
/
hextoc.py
executable file
·270 lines (252 loc) · 11 KB
/
hextoc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#
# FBInk related tool, Copyright (C) 2018-2024 NiLuJe <ninuje@gmail.com>
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Somewhat naive script to build a C array out of Unifont's hex format.
# Tested on Unscii & its fun variants (http://pelulamu.net/unscii/)
# NOTE: You can probably get something working out of BDF fonts, either via gbdfed, bdfe, or Unifont's bdfimplode/unibdf2hex,
# (in order of preference), but if the horizontal resolution is > 8, that may imply code tweaks to handle it right.
# Right now, fontwidth <= 8 means we store an array of uint8_t, for a <= 16xN font an array of uint16_t,
# a <= 32xN one an array of uint32_t, and a <= 64xN one an array of uint64_t ;).
# This script currently handles <= 64, as does FBInk (although, on FBInk's side,
# the uint64_t codepath is currently commented out, since it's unneeded with our current font panel).
# As for the conversion process itself, FontForge + gbdfed + a text editor should handle most common cases just fine ;).
# In case that wasn't clear, width of intermediary values are supported, it's just a bit of a waste of memory ;).
#
##
import os
import re
import sys
# We'll send the C header to stdout, but our C if/else snippet to stderr... ;).
# And then use your shell to handle the I/O redirections, because I'm lazy :D.
# ./hextoc.py unscii > ../fonts/unscii.h 2>> ../fbink_unscii.c
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
# NOTE: Turns out Unifont's hex format is *slightly* different from the one used by font8x8...
# I couldn't actually find a human-readable documentation of Unifont's format (http://czyborra.com/unifont/),
# but there is one for font8x8 (in its README).
# Thanks to this side of the documentation, what happened when we fed Unifont's format to the font8x8 renderer
# turned out to start making some sense...
# Long story short: we render glyphs *vertically* mirrored when doing nothing, so the issue lies in the row encoding...
# Turns out, it's as "simple" as doing a bit reversal on each byte (as in 8 bits), so that we get the "mirror" bitmask.
# And boom, magic.
# "Magic" was helped by a lucky Google search, which led me first to: https://stackoverflow.com/q/4245936
# And in turn to https://stackoverflow.com/q/12681945 which has a very Pythonic solution,
# and links to http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious for shit'n giggles.
# Which, believe it or not, I already had bookmarked for some mysterious reason? (I'm going to guess KindleTool related).
def hex2f8(v):
h = int(v, base=16)
# Pythonic approach, which may not require 64bits maths.
#return int('{:08b}'.format(h)[::-1], 2)
# Vintage 1972 magic trick ;)
return ((h * 0x0202020202 & 0x010884422010) % 1023)
# NOTE: c.f., https://stackoverflow.com/q/5333509/
def hex2f16(v):
h = int(v, base=16)
return int(bin(h)[2:].zfill(16)[::-1], 2)
def hex2f32(v):
h = int(v, base=16)
return int(bin(h)[2:].zfill(32)[::-1], 2)
def hex2f64(v):
h = int(v, base=16)
return int(bin(h)[2:].zfill(64)[::-1], 2)
# NOTE: Unifont is not really monospaced. We ideally ought to honor per-glyph metrics,
# but since we can't really do that, we simply seperate single-wide from double-wide glyphs.
# zcat unifont-*.hex.gz | grep -E '^([[:xdigit:]]{4}:)([[:xdigit:]]{32})$' >| unifont-8x16.hex
# zcat unifont-*.hex.gz | grep -E '^([[:xdigit:]]{4}:)([[:xdigit:]]{64})$' >| unifont-16x16.hex
# NOTE: Cozette should be converted to HEX via unibdf2hex, as gbdfed mangles it to oblivion...
# The main culprit appears to be the per-glyph advances,
# which means that the Vector variant will yield far tighter inter-glyph spacing.
# NOTE: The same idea applies to Scientifica Italic, where the Vector variant will yield far more pleasing results,
# thanks to its aggressive use of negative horizontal advances.
# This is the list of fonts we currently process
font_set = {
# name : (file, w, h)
"unscii" : ("unscii-8.hex", 8, 8),
"alt" : ("unscii-8-alt.hex", 8, 8),
"thin" : ("unscii-8-thin.hex", 8, 8),
"fantasy" : ("unscii-8-fantasy.hex", 8, 8),
"mcr" : ("unscii-8-mcr.hex", 8, 8),
"tall" : ("unscii-16.hex", 8, 16),
"block" : ("block.hex", 32, 32),
"leggie" : ("leggie-8x18.hex", 8, 18),
"veggie" : ("leggie-8x16v.hex", 8, 16),
"kates" : ("kates-7x15.hex", 7, 15),
"fkp" : ("fkp-8x16.hex", 8, 16),
"ctrld" : ("ctrld-fixed-8x16r.hex", 8, 16),
"orp" : ("orp-6x12.hex", 6, 12),
"orpb" : ("orp-6x12b.hex", 6, 12),
"orpi" : ("orp-6x12i.hex", 6, 12),
"scientifica" : ("scientifica-5x12.hex", 5, 12),
"scientificab" : ("scientifica-5x12b.hex", 5, 12),
"scientificai" : ("scientifica-7x12i.hex", 7, 12),
"terminus" : ("ter-u16n.hex", 8, 16),
"terminusb" : ("ter-u16b.hex", 8, 16),
"fatty" : ("fatty7x16-iso10646-1.hex", 7, 16),
"spleen" : ("spleen-16x32.hex", 16, 32),
"tewi" : ("tewi-medium-11.hex", 6, 13),
"tewib" : ("tewi-bold-11.hex", 6, 13),
"topaz" : ("TopazPlus-8x16.hex", 8, 16),
"microknight" : ("MicroKnightPlus-8x16.hex", 8, 16),
"vga" : ("iv8x16u.hex", 8, 16),
"unifont" : ("unifont-8x16.hex", 8, 16),
"unifontdw" : ("unifont-16x16.hex", 16, 16),
"cozette" : ("cozette-8x13.hex", 8, 13),
}
# Get the font name from the first arg passed to the script
fontname = len(sys.argv) > 1 and sys.argv[1].lower() or "NULL"
if fontname not in font_set.keys():
print("Unknown font name '{}'!".format(fontname))
print("Available: {}".format(', '.join([k for k in font_set.keys()])))
sys.exit(-1)
# And populate that specific font's settings
fontfile = os.path.dirname(os.path.abspath(__file__)) + "/../fonts/" + font_set[fontname][0]
fontwidth = font_set[fontname][1]
fontheight = font_set[fontname][2]
print("/*")
print("* C Header for use with https://github.com/NiLuJe/FBInk")
print("* Converted from Hex font {}".format(font_set[fontname][0]))
print("* With FBInk's tools/hextoc.py")
print("*/")
print("")
print("#pragma once")
print("")
blocknum = 0
blockcount = 1
blockcp = 0x0
cp = 0x0
prevcp = -42
# Cozette actually goes > 0xFFFF, so handle a proper uint32_t range.
pat_cp = "([0-9a-fA-F]{4,8})"
if fontwidth <= 8:
pat_rows = "([0-9a-fA-F]{2})" * fontheight
elif fontwidth <= 16:
pat_rows = "([0-9a-fA-F]{4})" * fontheight
elif fontwidth <= 32:
pat_rows = "([0-9a-fA-F]{8})" * fontheight
elif fontwidth <= 64:
pat_rows = "([0-9a-fA-F]{16})" * fontheight
else:
print("Unsupported font width (Must be <= 64)!")
exit(-1)
fmt = re.compile(r"^{}:{}$".format(pat_cp, pat_rows))
with open(fontfile, "r") as f:
for line in f:
m = fmt.match(line)
if m:
cp = m.group(1)
hcp = int(cp, base=16)
if hcp == prevcp + 0x1:
blockcount += 1
else:
if blocknum > 0:
print("}}; // {}".format(blockcount))
print("")
if blocknum == 1:
if fontwidth <= 8:
eprint("static const unsigned char*")
elif fontwidth <= 16:
eprint("static const uint16_t*")
elif fontwidth <= 32:
eprint("static const uint32_t*")
elif fontwidth <= 64:
eprint("static const uint64_t*")
else:
exit(-1)
eprint(" {}_get_bitmap(uint32_t codepoint)".format(fontname))
eprint("{")
if blockcp > 0 :
if blockcp == prevcp:
eprint("\tif (codepoint == {:#04x}u) {{".format(blockcp))
eprint("\t\treturn {}_block{}[0];".format(fontname, blocknum))
else:
eprint("\tif (codepoint >= {:#04x}u && codepoint <= {:#04x}u) {{".format(blockcp, prevcp))
eprint("\t\treturn {}_block{}[codepoint - {:#04x}u];".format(fontname, blocknum, blockcp))
else:
if prevcp == 0:
eprint("\tif (codepoint == {:#04x}u) {{".format(prevcp))
else:
eprint("\tif (codepoint <= {:#04x}u) {{".format(prevcp))
eprint("\t\treturn {}_block{}[codepoint];".format(fontname, blocknum))
else:
if blockcp == prevcp:
eprint("\t}} else if (codepoint == {:#04x}u) {{".format(blockcp))
eprint("\t\treturn {}_block{}[0];".format(fontname, blocknum))
else:
eprint("\t}} else if (codepoint >= {:#04x}u && codepoint <= {:#04x}u) {{".format(blockcp, prevcp))
eprint("\t\treturn {}_block{}[codepoint - {:#04x}u];".format(fontname, blocknum, blockcp))
blocknum += 1
blockcount = 1
blockcp = hcp
if fontwidth <= 8:
print("static const unsigned char {}_block{}[][{}] = {{".format(fontname, blocknum, fontheight))
elif fontwidth <= 16:
print("static const uint16_t {}_block{}[][{}] = {{".format(fontname, blocknum, fontheight))
elif fontwidth <= 32:
print("static const uint32_t {}_block{}[][{}] = {{".format(fontname, blocknum, fontheight))
elif fontwidth <= 64:
print("static const uint64_t {}_block{}[][{}] = {{".format(fontname, blocknum, fontheight))
else:
exit(-1)
print(u"\t{", end='')
if fontwidth <= 8:
for i in range(fontheight):
print(u" {:#04x}".format(hex2f8(m.group(i+2))), end='' if i+1 == fontheight else ',')
elif fontwidth <= 16:
for i in range(fontheight):
print(u" {:#06x}".format(hex2f16(m.group(i+2))), end='' if i+1 == fontheight else ',')
elif fontwidth <= 32:
for i in range(fontheight):
print(u" {:#010x}".format(hex2f32(m.group(i+2))), end='' if i+1 == fontheight else ',')
elif fontwidth <= 64:
for i in range(fontheight):
print(u" {:#018x}".format(hex2f64(m.group(i+2))), end='' if i+1 == fontheight else ',')
else:
exit(-1)
print(u" }},\t// U+{} ({})".format(cp, chr(hcp) if hcp >= 0x20 else "ESC"))
prevcp = hcp
print("}}; // {}".format(blockcount))
print("")
# Handle single block fonts
if blocknum == 1:
if fontwidth <= 8:
eprint("static const unsigned char*")
elif fontwidth <= 16:
eprint("static const uint16_t*")
elif fontwidth <= 32:
eprint("static const uint32_t*")
elif fontwidth <= 64:
eprint("static const uint64_t*")
else:
exit(-1)
eprint(" {}_get_bitmap(uint32_t codepoint)".format(fontname))
eprint("{")
if blockcp > 0:
if blockcp == prevcp:
eprint("\tif (codepoint == {:#04x}u) {{".format(blockcp))
eprint("\t\treturn {}_block{}[0];".format(fontname, blocknum))
else:
eprint("\tif (codepoint >= {:#04x}u && codepoint <= {:#04x}u) {{".format(blockcp, prevcp))
eprint("\t\treturn {}_block{}[codepoint - {:#04x}u];".format(fontname, blocknum, blockcp))
else:
if prevcp == 0:
eprint("\tif (codepoint == {:#04x}u) {{".format(prevcp))
else:
eprint("\tif (codepoint <= {:#04x}u) {{".format(prevcp))
eprint("\t\treturn {}_block{}[codepoint];".format(fontname, blocknum))
else:
# Otherwise, don't forget the final block ;)
if blockcp == prevcp:
eprint("\t}} else if (codepoint == {:#04x}u) {{".format(blockcp))
eprint("\t\treturn {}_block{}[0];".format(fontname, blocknum))
else:
eprint("\t}} else if (codepoint >= {:#04x}u && codepoint <= {:#04x}u) {{".format(blockcp, prevcp))
eprint("\t\treturn {}_block{}[codepoint - {:#04x}u];".format(fontname, blocknum, blockcp))
eprint("\t} else {")
eprint('\t\tWARN("Codepoint U+%04X (%s) is not covered by this font", codepoint, u8_cp_to_utf8(codepoint));')
eprint("\t\treturn {}_block1[0];".format(fontname))
eprint("\t}")
eprint("}")
eprint("")