-
Notifications
You must be signed in to change notification settings - Fork 0
/
digitgrouper.py
471 lines (407 loc) · 19.3 KB
/
digitgrouper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
#!/usr/bin/env python3
import argparse
import fontforge
import re
import psMat
DECIMAL_LIST = '0123456789'
HEXADECIMAL_LIST = '0123456789abcdefABCDEF'
SCRIPTS = (
('DFLT',('dflt')),
('latn',('dflt')),
('cyrl',('dflt')),
('grek',('dflt')),
('kana',('dflt'))
)
MAIN_FEATURE = ('dgsp', SCRIPTS)
COMMA_FEATURE = ('dgco', SCRIPTS)
APSTR_FEATURE = ('dgap', SCRIPTS)
DOT_FEATURE = ('dgdo', SCRIPTS)
ALWAYS_ON_FEATURE = ('calt', SCRIPTS)
ALL_MODES = (MAIN_FEATURE, COMMA_FEATURE, APSTR_FEATURE, DOT_FEATURE)
HEXADECIMAL_MODE = (('dghx', SCRIPTS),)
DECIMAL_COMMA_MODE = (('dgdc', SCRIPTS),)
def collect_equivalents(font, basis='0123456789', use_gsubs=False):
result = set()
for c in basis:
glyph = font[ord(c)]
name = glyph.glyphname
result.add(name)
if use_gsubs:
additions = set()
for sub in glyph.getPosSub('*'):
if sub[1] in { 'Substitution', 'AltSubs', 'MultSubs' }:
additions |= set(sub[2:])
# TODO: should recurse, maybe...
result |= additions
return result
def find_first(font, chars):
for c in chars:
if ord(c) in font:
return font[ord(c)]
return None
def find_gap_size(font, gap_size):
try:
result = int(gap_size)
if result > 0: return result
except:
pass
glyph = find_first(font, gap_size if gap_size else '\N{THIN SPACE},. ')
if glyph: gap_size = glyph.width
# if suggested gap size is the size of a 0 it's probably
# a monospaced font, so use the default monospace gap.
size_of_0 = font[ord('0')].width
if gap_size > size_of_0 // 2: gap_size = size_of_0 // 3
return gap_size
def new_glyph(font, name, source=None, hshift=None):
glyph = font.createChar(-1, name)
if source:
if hshift:
glyph.addReference(source, psMat.translate(hshift, 0))
else:
glyph.addReference(source)
glyph.left_side_bearing = int(font[source].left_side_bearing)
glyph.right_side_bearing = int(font[source].right_side_bearing)
glyph.width = int(font[source].width)
return glyph
def resize_glyph(font, name, width, cls=None):
glyph = font[name]
change = int(glyph.width) - width
glyph.left_side_bearing = int(glyph.left_side_bearing) - change // 2
glyph.width = width
if cls: glyph.glyphclass = cls
def slide_glyph(font, name, distance):
glyph = font[name]
glyph.left_side_bearing = int(glyph.left_side_bearing) + distance
def rename_font(font):
oldname = font.familyname
while oldname not in font.fontname and ' ' in oldname:
# Sometimes things get tangled into the font name that don't belong
# there. Hopefully this can strip those off.
oldname = oldname.rsplit(' ', 1)[0]
newname = oldname + 'DG'
font.familyname = font.familyname.replace(oldname, newname)
font.fullname = font.fullname.replace(oldname, newname)
font.fontname = font.fontname.replace(oldname, newname)
for t in font.sfnt_names:
if oldname in t[2] and newname not in t[2]:
font.appendSFNTName(t[0], t[1], t[2].replace(oldname, newname))
def patch_a_font(font, monospace, terminal, final_rules, gap_size, shrink_x, shrink_y, move_less=False):
font.encoding = 'ISO10646'
gap_size = find_gap_size(font, gap_size)
print(f'zero: {font[ord("0")].width}, gap_size: {gap_size}')
new_glyph(font, 'thsp.capture3', 'z')
new_glyph(font, 'thsp.capture4', 'y')
new_glyph(font, 'thsp.capture5', 'x')
new_glyph(font, 'thsp.avoid', 'v')
for d in [3,4,5]:
space = find_first(font, '\N{THIN SPACE} ').glyphname
comma = find_first(font, ',').glyphname
apostrophe = find_first(font, "'").glyphname
dot = find_first(font, '.').glyphname
new_glyph(font, f'thsp.sep{d}', space)
new_glyph(font, f'thsp.comma{d}', comma)
new_glyph(font, f'thsp.apostrophe{d}', apostrophe)
new_glyph(font, f'thsp.dot{d}', dot)
dec_group = collect_equivalents(font, '0123456789', final_rules)
hex_group = dec_group | collect_equivalents(font, 'abcdefABCDEF', final_rules)
dsep_group = collect_equivalents(font, '.,', final_rules)
capture_group = ['thsp.capture3','thsp.capture4','thsp.capture5','thsp.avoid']
separator_group = set()
for d in [3,4,5]:
separator_group |= {f'thsp.sep{d}',f'thsp.comma{d}',f'thsp.apostrophe{d}',f'thsp.dot{d}'}
for gn in separator_group:
if monospace:
adjustment = -gap_size if gn.endswith('5') else gap_size
slide_glyph(font, gn, adjustment)
resize_glyph(font, gn, 0, 'mark')
else:
resize_glyph(font, gn, gap_size)
#print(f'decimals: {dec_group}')
#print(f'hexadecimals: {hex_group}')
adjustments = {
'lf_1_6': -1 * (gap_size // 6),
'rt_1_6': 1 * (gap_size // 6),
'lf_1_4': -1 * (gap_size // 4),
'rt_1_4': 1 * (gap_size // 4),
'lf_1_2': -1 * (gap_size // 2),
'rt_1_2': 1 * (gap_size // 2),
'lf_3_4': -3 * (gap_size // 4),
'rt_1_3': 1 * (gap_size // 3),
'rt_2_3': 2 * (gap_size // 3),
'lf_1_1': -1 * (gap_size // 1),
'rt_1_1': 1 * (gap_size // 1),
}
classes = {
'dec': dec_group,
'hex': hex_group,
'sep3': ['thsp.sep3'],
'sep4': ['thsp.sep4'],
'sep5': ['thsp.sep5'],
'anysep3': ['thsp.sep3','thsp.comma3','thsp.apostrophe3','thsp.dot3'],
'anysep4': ['thsp.sep4','thsp.comma4','thsp.apostrophe4','thsp.dot4'],
'anysep5': ['thsp.sep5','thsp.comma5','thsp.apostrophe5','thsp.dot5'],
'cap3': ['thsp.capture3'],
'cap4': ['thsp.capture4'],
'cap5': ['thsp.capture5'],
'avoid': ['thsp.avoid'],
'anycap': capture_group,
'zero': collect_equivalents(font, '0', final_rules),
'xx': collect_equivalents(font, 'bBoOxX', final_rules),
'dot': collect_equivalents(font, '.', final_rules),
'comma': collect_equivalents(font, ',', final_rules),
'dotsep5': dsep_group | {'thsp.sep5','thsp.comma5','thsp.apostrophe5','thsp.dot5'},
}
for name in adjustments.keys():
classes['hex_'+name] = classes['hex']
classes['dec_'+name] = classes['dec']
if terminal:
classes['hex_'+name] = [ v+'.'+name for v in classes['hex'] ]
classes['dec_'+name] = [ v+'.'+name for v in classes['dec'] ]
classes_fmt = {
k: '[ ' + ' '.join(v) + ' ]' for k,v in classes.items()
}
curr_lookup = None
subtable_index = 0
if final_rules:
curr_lookup = font.gsub_lookups[-1]
def new_lookup(name, lu_type, features=()):
nonlocal curr_lookup, subtable_index
if curr_lookup:
font.addLookup(name, lu_type, None, features, curr_lookup)
else:
font.addLookup(name, lu_type, None, features)
curr_lookup = name
subtable_index = 0
def new_glyph_rule(name, lu_type, features=()):
new_lookup(name, lu_type, features)
font.addLookupSubtable(name, name)
return name
def new_ctx_subtable(st_type, rule):
nonlocal curr_lookup, subtable_index, classes_fmt
name = f'{curr_lookup}-{subtable_index}'
if subtable_index:
after = f'{curr_lookup}-{subtable_index-1}'
font.addContextualSubtable(curr_lookup, name, st_type,
rule.format(**classes_fmt), afterSubtable=after)
else:
font.addContextualSubtable(curr_lookup, name, st_type,
rule.format(**classes_fmt))
subtable_index += 1
return name
def new_coverage(rule):
return new_ctx_subtable('coverage', rule)
def new_rev_coverage(rule):
return new_ctx_subtable('reversecoverage', rule)
# Each lookup is executed in the order they're listed below, but
# selectively enabled by their assigned font features. Within each lookup,
# the first matching subtable ends the search and advances to the next
# character in the string.
#
# What are called 'glyph rules' here are substitutions which are described
# within the glyph rather than within context rules. The lookup and
# subtable are given the same name so they can be used interchangeably
# (glyphs reference the subtable, and subtable rules reference the lookup).
#
# Broadly, each group of digits is classified by its prefix, in left-to-
# right order, and that classification is stretched out to the end of the
# group of digits. Then rules for whole numbers are applied in right-to-
# left order to form groups of three or four digits depending on the
# capture type, and a rule for decimals is applied in left-to-right order
# to form groups of five digits. Finally another rule sweeps away the
# classification markers.
#
# After that a couple of extra tweaks are applied to use different
# characters for the thousand separators if needed.
# Rules to mark any digit in a string
new_glyph_rule('capture_3digit', 'gsub_multiple')
new_glyph_rule('capture_4digit', 'gsub_multiple')
new_glyph_rule('capture_5digit', 'gsub_multiple')
new_glyph_rule('capture_avoid', 'gsub_multiple')
# And a rule to remove those marks
new_glyph_rule('release_digit', 'gsub_ligature')
new_glyph_rule('nop', 'gsub_single')
for g in hex_group:
# Arguments for gsub_multiple and gsub_ligature rules look the same,
# but they have opposing substitution rules.
font[g].addPosSub('capture_3digit', (g, 'thsp.capture3'))
font[g].addPosSub('capture_4digit', (g, 'thsp.capture4'))
font[g].addPosSub('capture_5digit', (g, 'thsp.capture5'))
font[g].addPosSub('capture_avoid', (g, 'thsp.avoid'))
for cap in capture_group:
font[g].addPosSub('release_digit', (g, cap))
# A rule to insert separator over capture.
new_glyph_rule('insert_separator', 'gsub_single')
font['thsp.capture3'].addPosSub('insert_separator', 'thsp.sep3')
font['thsp.capture4'].addPosSub('insert_separator', 'thsp.sep4')
font['thsp.capture5'].addPosSub('insert_separator', 'thsp.sep5')
# Capture hexadecimal generously if cofigured to do so
new_lookup('capture_as_hex', 'gsub_contextchain', HEXADECIMAL_MODE)
new_coverage('{hex} | {hex} @<capture_4digit> | {hex} {hex} {hex}')
new_lookup('comma_as_decimal', 'gsub_contextchain', DECIMAL_COMMA_MODE)
# if it's `n,nnnn` that's a decimal number
new_coverage( '{dec} {comma} | {dec} @<capture_5digit> | {dec} {dec} {dec} {dec}')
new_coverage('{cap3} {comma} | {dec} @<capture_5digit> | {dec} {dec} {dec} {dec}')
# otherwise if it's `,nnnnn` it's not clear what it is, so avoid it.
new_coverage( '{comma} | {dec} @<capture_avoid> | {dec} {dec} {dec}')
# and we switch off support for decimal dot, while we're here.
new_coverage( '{cap3} {dot} | {dec} @<capture_avoid> | {dec} {dec} {dec}')
new_coverage( '{dec} {dot} | {dec} @<capture_avoid> | {dec} {dec} {dec}')
# Captures for all the different digit types
new_lookup('capture_numbers', 'gsub_contextchain', ALL_MODES)
# if it's `..nnnnn`, that's probably an integer following a range.
new_coverage( '{dot} {dot} | {dec} @<capture_3digit> | {dec} {dec} {dec} {dec}')
# if it's `n.nnnn` that's a decimal number
new_coverage( '{dec} {dot} | {dec} @<capture_5digit> | {dec} {dec} {dec} {dec}')
new_coverage('{cap3} {dot} | {dec} @<capture_5digit> | {dec} {dec} {dec} {dec}')
# otherwise if it's `.nnnnn` it's not clear what it is, so avoid it.
new_coverage( '{dot} | {dec} @<capture_avoid> | {dec} {dec} {dec}')
## TODO: consider: excluding `x` in middle of number (is it `XXXxYYY`?)
# This is already partially-implemented in that the capture will break the
# `0x` match.
new_coverage( '{zero} {xx} | {hex} @<capture_4digit> | {hex} {hex} {hex} {hex}')
new_coverage( '{dec} | {dec} @<capture_3digit> | {dec} {dec} {dec}')
# avoid doubling up on captures (can happen when using extra features)...
new_coverage('{anycap} | {hex} @<nop> | {anycap}')
# and then fill everything following a capture to match that type
new_coverage('{cap3} | {dec} @<capture_3digit> |')
new_coverage('{cap4} | {hex} @<capture_4digit> |')
new_coverage('{cap5} | {dec} @<capture_5digit> |')
new_coverage('{avoid} | {hex} @<capture_avoid> |')
# Convert every nth capture into a digit group
new_lookup('reflow_numbers_rev', 'gsub_reversecchain', ALL_MODES)
new_rev_coverage('| {cap3} => {sep3} | {dec} {cap3} {dec} {cap3} {dec}')
new_rev_coverage('| {cap4} => {sep4} | {hex} {cap4} {hex} {cap4} {hex} {cap4} {hex}')
new_lookup('reflow_numbers_fwd', 'gsub_contextchain', ALL_MODES)
new_coverage('{dec} {cap5} {dec} {cap5} {dec} {cap5} {dec} {cap5} {dec} | {cap5} @<insert_separator> | {dec}')
# Remove unused capture markers
new_lookup('release_numbers', 'gsub_contextchain', ALL_MODES)
new_coverage('| {hex} @<release_digit> {anycap} |')
# convert separators into commas or apostrophes (TBD, dots?)
new_glyph_rule('comma_separator', 'gsub_single', (COMMA_FEATURE,))
new_glyph_rule('apostrophe_separator', 'gsub_single', (APSTR_FEATURE,))
new_glyph_rule('dot_separator', 'gsub_single', (DOT_FEATURE,))
for d in [3,4,5]:
glyph = font[f'thsp.sep{d}']
glyph.addPosSub('comma_separator', f'thsp.comma{d}')
glyph.addPosSub('apostrophe_separator', f'thsp.apostrophe{d}')
glyph.addPosSub('dot_separator', f'thsp.dot{d}')
if monospace:
# I believe it's legal to fold all the lookups onto one line, but
# fontforge doesn't seem to support it, so this is unrolled. It might
# be that the multi-lookup form of the table was always split into
# separate entries anyway. I do not know.
# TODO: user-selectable decision, here; including a third "away from
# separator" mode.
if move_less:
rules = [
'{dotsep5} | {dec} @<rt_1_2> | {dec} {dec} {dec} {dec} {anysep5}',
'{dotsep5} {dec_rt_1_2} | {dec} @<rt_1_4> | {dec} {dec} {dec} {anysep5}',
# middle digit doesn't move
'{dotsep5} {dec_rt_1_2} {dec_rt_1_4} {dec} | {dec} @<lf_1_4> | {dec} {anysep5}',
'{dotsep5} {dec_rt_1_2} {dec_rt_1_4} {dec} {dec_lf_1_4} | {dec} @<lf_1_2> | {anysep5}',
'{anysep4} | {hex} @<rt_1_2> | {hex} {hex} {hex}',
'{anysep4} {hex_rt_1_2} | {hex} @<rt_1_6> | {hex} {hex}',
'{anysep4} {hex_rt_1_2} {hex_rt_1_6} | {hex} @<lf_1_6> | {hex}',
'{anysep4} {hex_rt_1_2} {hex_rt_1_6} {hex_lf_1_6} | {hex} @<lf_1_2> |',
'{anysep3} | {dec} @<rt_1_2> | {dec} {dec}',
# middle digit doesn't move
'{anysep3} {dec} {dec} | {dec} @<lf_1_2> |',
]
else:
rules = [
# first digit doesn't move
'{dotsep5} {dec} | {dec} @<lf_1_4> | {dec} {dec} {dec} {anysep5}',
'{dotsep5} {dec} {dec_lf_1_4} | {dec} @<lf_1_2> | {dec} {dec} {anysep5}',
'{dotsep5} {dec} {dec_lf_1_4} {dec_lf_1_2} | {dec} @<lf_3_4> | {dec} {anysep5}',
'{dotsep5} {dec} {dec_lf_1_4} {dec_lf_1_2} {dec_lf_3_4} | {dec} @<lf_1_1> | {anysep5}',
'{anysep4} | {hex} @<rt_1_1> | {hex} {hex} {hex}',
'{anysep4} {hex_rt_1_1} | {hex} @<rt_2_3> | {hex} {hex}',
'{anysep4} {hex_rt_1_1} {hex_rt_2_3} | {hex} @<rt_1_3> | {hex}',
# last digit doesn't move
'{anysep3} | {dec} @<rt_1_1> | {dec} {dec}',
'{anysep3} {dec_rt_1_1} | {dec} @<rt_1_2> | {dec}',
# last digit doesn't move
]
useful_adjustments = {}
pat = re.compile(r'{([a-z_0-9]+)} @<([a-z_0-9]+)>')
for r in rules:
match = pat.search(r)
name, digits = match.group(2, 1)
digits = set(classes[digits])
digits |= useful_adjustments.get(name, set())
useful_adjustments[name] = digits
# TODO: use above result
if terminal:
for name, adjustment in adjustments.items():
new_glyph_rule(name, 'gsub_single')
for g in hex_group:
adjusted_g = g+'.'+name
new_glyph(font, adjusted_g, g, adjustment)
font[g].addPosSub(name, adjusted_g)
new_lookup('pinch_digits', 'gsub_contextchain', ALL_MODES)
else:
# switch to gpos
curr_lookup = None
for name, adjustment in adjustments.items():
new_glyph_rule(name, 'gpos_single')
for g in hex_group:
font[g].addPosSub(name, adjustment, 0, 0, 0)
new_lookup('pinch_digits', 'gpos_contextchain', ALL_MODES)
for r in rules:
new_ctx_subtable('coverage', r)
rename_font(font)
return font
def main(font_list, separate_files=False, always_on=False, **kwargs):
global ALL_MODES
if always_on:
ALL_MODES = (*ALL_MODES, ALWAYS_ON_FEATURE)
results = []
for font_file in font_list:
for font_name in fontforge.fontsInFile(font_file.name):
font_id = f'{font_file.name}({font_name})'
font = fontforge.open(font_id)
results.append(patch_a_font(font, **kwargs))
if separate_files:
font.generate(f'{font.fullname}.ttf')
font.generateFeatureFile(f'{font.fullname}.fea')
print('saved: ', font.fullname)
if len(results) > 1:
results[0].generateTtc('output.ttc', results[1:],
ttcflags=('merge',), layer=results[0].activeLayer)
else:
results[0].generate('output.ttf')
for font in results: font.close()
def float_or_pct(string):
if string[-1] == '%':
return float(string[:-1]) * 0.01;
return float(string)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=('Add font-based digit grouping. ')
)
parser.add_argument('font_list', metavar='font', nargs='+',
type=argparse.FileType('rb'), help='font files to patch')
parser.add_argument('--monospace', default=False,
action='store_true',
help='Squeeze numbers together to fit original spacing')
parser.add_argument('--terminal', default=False,
action='store_true',
help='Use GSUB instead of GPOS rules, creating new glyphs')
parser.add_argument('--final-rules', default=False,
action='store_true',
help='Insert new rules as last GSUB rules, not first.')
parser.add_argument('--always-on', default=False,
action='store_true',
help='List tables under `calt` feature, always on')
parser.add_argument('--gap-size', type=str, default=",",
help='size of space for thousand separator, try 300 or ","')
parser.add_argument('--shrink_x', type=float_or_pct, default=1.0,
help='horizontal scale to apply to digits being'
' repositioned')
parser.add_argument('--shrink_y', type=float_or_pct, default=1.0,
help='vertical scale to apply to the digits being'
' repositioned')
parser.add_argument('--separate-files', default=False,
action='store_true',
help='Write every font to a separate file.')
main(**vars(parser.parse_args()))