Skip to content

Commit

Permalink
Added ligatures for letters represented by unicode character sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
irori committed Jun 23, 2024
1 parent d080938 commit ccbfe5f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 6 deletions.
14 changes: 14 additions & 0 deletions converter/charset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def __init__(self, plane):
raise ValueError('Invalid JIS plane %d' % plane)
self.plane = plane
self.decoder = codecs.getdecoder('euc_jis_2004')
self.encoder = codecs.getencoder('euc_jis_2004')

def unicode(self, cp):
# Convert JIS to EUC-JIS-2004 and then Unicode
Expand All @@ -25,6 +26,19 @@ def unicode(self, cp):
except UnicodeDecodeError:
return None

def decompose(self, ustr):
names = []
for u in ustr:
try:
euc, n = self.encoder(u)
if self.plane == 2:
names.append(f'jis2-{euc[1] - 0xa0:02}-{euc[2] - 0xa0:02}')
else:
names.append(f'jis1-{euc[0] - 0xa0:02}-{euc[1] - 0xa0:02}')
except UnicodeEncodeError:
names.append(f'u{ord(u):04X}')
return ' '.join(names)


def codeconv(charset_registry, charset_encoding):
if re.match(r'JISX\d+(\.\d+)?', charset_registry, flags=re.IGNORECASE):
Expand Down
5 changes: 5 additions & 0 deletions converter/charset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,8 @@ def test_plane2(self):

self.assertEqual(unmapped, 9)
self.assertEqual(len(unicode_to_jis), 2436)

def test_decompose(self):
cconv = charset.JIS(1)
self.assertEqual('jis1-04-11 u309A', cconv.decompose('\u304b\u309a'))
self.assertEqual('jis1-11-64 jis1-11-68', cconv.decompose('\u02e5\u02e9'))
24 changes: 18 additions & 6 deletions converter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,19 @@ def create_ufo(fonts, limit=None):
fonts[0].set_ufo_metrics(ufo.info)

vert_feature = []
liga_feature = []

count = 0
for font in fonts:
for g in font.glyphs():
if len(g.unicode) > 1:
print('Cannot convert unicode sequence %s' % g.unicode, file=sys.stderr)
continue

ufo_glyph = ufo.newGlyph(g.name())
ufo_glyph.unicodes = charset.variants(ord(g.unicode))

if len(g.unicode) == 1:
ufo_glyph.unicodes = charset.variants(ord(g.unicode))
else:
glyph_seq = font.codeconv.decompose(g.unicode)
liga_feature.append(' sub %s by %s;' % (glyph_seq, g.name()))

ufo_glyph.width = font.width
ufo_glyph.height = font.ascent - font.descent
draw(g, ufo_glyph)
Expand All @@ -70,12 +73,21 @@ def create_ufo(fonts, limit=None):
draw(vg, ufo_vglyph)
vert_feature.append(' sub %s by %s;' % (g.name(), vg.name()))

if g.unicode == '\u309c': # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
# Add COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK which is used in ligatures.
u309a = ufo.insertGlyph(ufo_glyph, 'u309A')
u309a.unicode = 0x309a

count += 1
if limit and count >= limit:
break

features = ''
if len(vert_feature) > 0:
ufo.features.text = 'feature vert {\n' + '\n'.join(vert_feature) + '\n} vert;'
features += 'feature vert {\n' + '\n'.join(vert_feature) + '\n} vert;\n'
if len(liga_feature) > 0:
features += 'feature liga {\n' + '\n'.join(liga_feature) + '\n} liga;\n'
ufo.features.text = features

print('%d glyphs converted' % count)
return ufo
Expand Down

0 comments on commit ccbfe5f

Please sign in to comment.