From a426d41b004cfc635eeeec96611794f50d7ce930 Mon Sep 17 00:00:00 2001
From: sl2c <akakii.fx@gmail.com>
Date: Sat, 14 Dec 2024 14:53:42 +0400
Subject: [PATCH] v2.6

---
 ccitt.py                     |  37 ++++--
 glyph-lists/my-glyphlist.txt |  18 +++
 pdffont.py                   | 213 +++++++++++++++++++++++++++--------
 pdffontencoding.py           |  43 ++++---
 pdffontglyphmap.py           |   2 +-
 pdfimage.py                  |  60 ++++++++--
 pdfstreameditor.py           |  24 ++--
 7 files changed, 302 insertions(+), 95 deletions(-)

diff --git a/ccitt.py b/ccitt.py
index f1a21f7..8b04184 100644
--- a/ccitt.py
+++ b/ccitt.py
@@ -65,7 +65,7 @@ class Group4Decoder(object):
     MAKEUP_LOW_BLACK_DECODE = {v:(k+1)*64 for k,v in enumerate(MAKEUP_LOW_BLACK_ENCODE)}
 
     MAKEUP_HIGH_ENCODE = [
-        '00000001000','00000001100','00000001001','000000010010','000000010011','000000010100','000000010101','000000010110',
+        '00000001000','00000001100','00000001101','000000010010','000000010011','000000010100','000000010101','000000010110',
         '000000010111','000000011100','000000011101','000000011110','000000011111'
     ]
     MAKEUP_HIGH_DECODE = {v:(k + 28)*64 for k,v in enumerate(MAKEUP_HIGH_ENCODE)}
@@ -83,17 +83,22 @@ def decode(self, data:bytes, Columns:int, EncodedByteAlign:bool = False):
         with ends of lines padded with 0-bits to whole bytes, if necessary.
         '''
 
-        def dump(outBits:str, Columns:int, a0:int, line:int, message:str):
+        MODES = self.MODES_DECODE
+        WHITE, BLACK = 0, 1
+
+        peek = lambda i: inBits[inPos:inPos+i]
+        getBit = lambda color: '0' if color == WHITE else '1'
+
+        def dump(inBits:str, outBits:str, Columns:int, a0:int, line:int, message:str):
             nBytes = (Columns + 7 ) // 8
             if a0 < nBytes * 8:
                 outBits += '0' * (nBytes * 8 - a0)
             from PIL import Image, ImageChops
             pil = Image.frombytes('1',(Columns, line+1), toBytes(outBits))
             ImageChops.invert(pil).save('dump.tif')
-            raise ValueError(message + '\n' + 'salvaged parts of the image written to dump.tif')
-
-        MODES = self.MODES_DECODE
-        WHITE, BLACK = 0, 1
+            message += f'\nline = {line}, a0 = {a0}, peek = {peek(24)}'
+            message += '\nsalvaged parts of the image written to dump.tif'
+            raise ValueError(message)
 
         toBytes = lambda bits: b''.join(int(bits[i:i+8],2).to_bytes(1,'big') for i in range(0,len(bits),8))
 
@@ -101,8 +106,6 @@ def dump(outBits:str, Columns:int, a0:int, line:int, message:str):
         inBits = ''.join(f'{d:08b}' for d in data)
         inPos = 0
         outBits = ''
-        peek = lambda i: inBits[inPos:inPos+i]
-        getBit = lambda color: '0' if color == WHITE else '1'
 
         b = []
         a = []
@@ -132,8 +135,18 @@ def dump(outBits:str, Columns:int, a0:int, line:int, message:str):
                         a0 = b2
                     elif l == 'HOR':
                         # Horizontal mode
-                        M01, inPos = self.get_run_length(inBits, inPos, color)
-                        M12, inPos = self.get_run_length(inBits, inPos, color^1)
+                        try:
+                            M01, inPos = self.get_run_length(inBits, inPos, color)
+                        except:
+                            clr = 'white' if color == WHITE else 'black'
+                            dump(inBits, outBits, Columns, a0, line, f'failed to get 1st ({clr}) run length')
+
+                        try:
+                            M12, inPos = self.get_run_length(inBits, inPos, color^1)
+                        except:
+                            clr = 'black' if color == WHITE else 'white'
+                            dump(inBits, outBits, Columns, a0, line, f'failed to get 2nd ({clr}) run length')
+
                         outBits += getBit(color)*M01 + getBit(color^1)*M12
                         a1, a2 = a0 + M01, a0 + M01 + M12
                         a.append(a1); a.append(a2)
@@ -155,14 +168,14 @@ def dump(outBits:str, Columns:int, a0:int, line:int, message:str):
             if l is None:
 
                 if peek(24) != self.EOFB:
-                    dump(outBits, Columns, a0, line, f'unrecognized bits at line = {line}, a0 = {a0}: {peek(24)}')
+                    dump(inBits, outBits, Columns, a0, line, f'unrecognized bits')
 
                 if res := len(outBits) % 8:
                     outBits += '0'*(8-res)
                 return toBytes(outBits)
 
             if a0 > Columns:
-                dump(outBits, Columns, a0, line, f'extra bits at line = {line}, a0 = {a0}')
+                dump(inBits, outBits, Columns, a0, line, f'extra bits at the end of line')
             
             if a0 == Columns:
                 a0 = -1
diff --git a/glyph-lists/my-glyphlist.txt b/glyph-lists/my-glyphlist.txt
index 91e465b..edc8f4a 100644
--- a/glyph-lists/my-glyphlist.txt
+++ b/glyph-lists/my-glyphlist.txt
@@ -19,6 +19,8 @@ FL107;2014
 Accent;0301
 accent;0301
 bot;22A5
+bar.one;2015
+bar.two;2015
 check;2713
 equaldotleftright;2252
 nobreakspace;00A0
@@ -50,6 +52,7 @@ lessthanorequalangled;2A7D
 greaterthanorequalangled;2A7E
 integralex;23ae
 EurDig;20AC
+zero.slash;0030
 # ============================================== Numbers
 0;0030
 1;0031
@@ -873,20 +876,28 @@ VertBar1;007c
 AE;00C6
 Complex;2102
 Delta;2206
+Delta1;2206
 Gamma;0393
+Gamma1;0393
 Ifractur;2111
 Lambda;039B
+Lambda1;039B
 Natural;2115
 OE;0152
 Omega;2126
+Omega1;2126
 Oslash;00D8
 Phi;03A6
+Phi1;03A6
 Pi;03A0
 Psi;03A8
+Psi1;03A8
 Real;211D
 Rfractur;211C
 Sigma;03A3
+Sigma1;03A3
 Theta;0398
+Theta1;0398
 Upsilon;03A5
 Xi;039E
 Zinteger;2124
@@ -983,8 +994,11 @@ braceleftbig8;007B
 braceleftbig9;007B
 braceleftbigg;007B
 braceleftbt;23A9
+braceleft.bot;23A9
 braceleftmid;23A8
+braceleft.mid;23A8
 bracelefttp;23A7
+braceleft.top;23A7
 braceright;007D
 bracerightBig;007D
 bracerightBigg;007D
@@ -1000,8 +1014,11 @@ bracerightbig8;007D
 bracerightbig9;007D
 bracerightbigg;007D
 bracerightbt;23AD
+braceright.bot;23AD
 bracerightmid;23AC
+braceright.mid;23AC
 bracerighttp;23AB
+braceright.top;23AB
 bracketleftBig;005B
 bracketleftBigg;005B
 bracketleftbig;005B
@@ -1101,6 +1118,7 @@ endash;2013
 epsilon1;1D716
 #epsilon1;03B5
 epsilon;03B5
+equal1;003D
 equivalence;2261
 equivasymptotic;224D
 eta;03B7
diff --git a/pdffont.py b/pdffont.py
index 0a4f5c0..4d4f05c 100755
--- a/pdffont.py
+++ b/pdffont.py
@@ -16,10 +16,11 @@
 from pdfrwx.pdffontcmap import PdfFontCMap
 from pdfrwx.pdffilter import PdfFilter
 from pdfrwx.pdfgeometry import VEC, MAT, BOX
+from pdfrwx.pdfobjects import PdfObjects
 
 # fontLib
 try:
-    from fontTools.ttLib import TTFont
+    from fontTools.ttLib import TTFont, newTable
     from fontTools.ttLib.tables._c_m_a_p import CmapSubtable
     from fontTools.t1Lib import T1Font, writePFB
     from fontTools.cffLib import CFFFontSet
@@ -200,6 +201,16 @@ def is_symbolic(font:PdfDict):
 
         return isSymbolic
 
+    # -------------------------------------------------------------------------------- get_font_name()
+
+    @staticmethod
+    def get_font_name(font:PdfDict):
+        '''
+        Returns font's name: `font.Name` for a Type3 font and `font.BaseFont` for all others.
+        Note that a font may have no name, in which case `None` is returned.
+        '''
+        return font.Name if font.Subtype == '/Type3' else font.BaseFont
+
     # -------------------------------------------------------------------------------- get_font_descriptor()
 
     @staticmethod
@@ -299,7 +310,17 @@ def get_type3_bbox(font:PdfDict):
     @staticmethod
     def get_subtype_string(font:PdfDict):
         '''
-        String representation of the font's Subtype
+        Returns `subtype + suffix` where `subtype` is one of:
+        
+        `/Type1, /TrueType, /CIDFontType0, /CIDFontType2`
+
+        and suffix is:
+        
+        - `'C'` if `FontFile3.Subtype` is `/Type1C` or `/CIDFontType0C`;
+        - `'-OpenType'` if `FontFile3.Subtype` is `/OpenType`;
+        - absent if `FontFile3` font program is absent.
+        
+        See PDF Ref. v1.7 sec. 5.8, table 5.23.
         '''
         suffixes = {'/Type1C':'C', '/CIDFontType0C':'C', '/OpenType':'-OpenType'}
 
@@ -307,13 +328,8 @@ def get_subtype_string(font:PdfDict):
 
         f = font if font.Subtype != '/Type0' else font.DescendantFonts[0]
 
-        prefix = f.Subtype
-
-        fd = f.FontDescriptor
-        if not fd: return prefix
- 
-        suffix = suffixes.get(fd.FontFile3.Subtype) if fd.FontFile3 else ''
-        return prefix + suffix
+        try: return f.Subtype + suffixes.get(f.FontDescriptor.FontFile3.Subtype)
+        except: return f.Subtype
 
     # -------------------------------------------------------------------------------- get_encoding_string()
 
@@ -393,8 +409,7 @@ def read_pfb_info(font:bytes):
         for char in chars.values(): char.draw(NullPen())
 
         # Set the maps
-        # info['gid2gname'] = {chr(i):gname for i,gname in enumerate(chars.keys())}
-        info['gid2gname'] = {chr(i):gname for i,gname in enumerate(t1.font['Encoding']) if gname != '.notdef'}
+        info['gid2gname'] = {i:gname for i,gname in enumerate(t1.font['Encoding']) if gname != '.notdef'}
         info['gname2width'] = {gname:chars[gname].width for gname in chars.keys()}
 
         # Return info
@@ -487,7 +502,7 @@ def read_ttf_otf_info(font:bytes):
             warn(f'failed to get glyphSet from font: {fontName}')
 
         if glyphSet is not None:
-            try: info['gid2gname'] = {chr(ttFont.getGlyphID(gname)):gname for gname in glyphSet}
+            try: info['gid2gname'] = {ttFont.getGlyphID(gname):gname for gname in glyphSet}
             except: warn(f'failed to get gid2gname from font: {fontName}')
             try: info['gname2width'] = {gname:glyphSet[gname].width * z for gname in glyphSet}
             except: warn(f'failed to get from gname2width font: {fontName}')
@@ -500,7 +515,7 @@ def read_ttf_otf_info(font:bytes):
                 if combo(table) == (1,0): info['cmap10'] = m
                 if combo(table) == (3,1): info['cmap30'] = m
 
-            # info['unicode2gname'] = ttFont.getBestCmap()
+            info['unicode2gname'] = ttFont.getBestCmap()
 
         # if re.search('MBBIVI', info['FontName']):
         #     ttFont.save('dump.ttf')
@@ -510,6 +525,37 @@ def read_ttf_otf_info(font:bytes):
         # Return the result
         return info
 
+    # -------------------------------------------------------------------------------- merge_cff_font_sets()
+
+    @staticmethod
+    def merge_cff_font_sets(cffFont1:CFFFontSet, cffFont2:CFFFontSet):
+        '''
+        Attempts to merge two CFF fonts. If the fonts are compatible, cffFont2 is updated with entries
+        from cffFont1, and the function returns True; otherwise, the two fonts are unchanged and
+        the function returns False.
+        '''
+        cs1 = cffFont1[0].CharStrings
+        cs2 = cffFont2[0].CharStrings
+
+        for s in cs1.values(): s.compile()
+        for s in cs2.values(): s.compile()
+
+        # check compatibility
+        if any(gname in cs2.keys() and cs1[gname].bytecode != cs2[gname].bytecode for gname in cs1.keys()):
+            return False
+
+        # check non-zero overlap
+        if not any(gname in cs2.keys() for gname in cs1.keys()):
+            return False
+        
+        # update cffFont2 with entries from cffFont1
+        for gname in cs1.keys():
+            if gname not in cs2.keys():
+                cs2[gname] = cs1[gname]
+                cffFont2[0].charset.append(gname)
+
+        return True
+
     # -------------------------------------------------------------------------------- read_cff_info()
 
     @staticmethod
@@ -542,10 +588,10 @@ def read_cff_info(cff:bytes):
 
         # gid2gname
         if info['ROS']:
-            info['gid2gname'] = {chr(int(gname[3:]) if gname != '.notdef' else 0):gname for gname in chars.keys()}
+            info['gid2gname'] = {(int(gname[3:]) if gname != '.notdef' else 0):gname for gname in chars.keys()}
         else:
             try:
-                info['gid2gname'] = {chr(i):gname for i,gname in enumerate(font.Encoding) if gname != '.notdef'}
+                info['gid2gname'] = {i:gname for i,gname in enumerate(font.Encoding) if gname != '.notdef'}
             except:
                 warn(f'CFF font has no CharStrings: {info["FontName"]}')
                 info['gid2gname'] = {}
@@ -589,7 +635,7 @@ def read_core14_info(fontName:str):
         baseEncodingName = PdfFontCore14.built_in_encoding(fontName)
         if baseEncodingName:
             baseEncoding = PdfFontEncoding(name = baseEncodingName)
-            info['gid2gname'] = {cc:gname[1:] for cc,gname in baseEncoding.cc2glyphname.items()}
+            info['gid2gname'] = {ord(cc):gname[1:] for cc,gname in baseEncoding.cc2glyphname.items()}
 
         # gname2width
         name2width = PdfFontCore14.make_name2width(fontName)
@@ -629,12 +675,15 @@ def fix_ttf_font(font:bytes):
     # -------------------------------------------------------------------------------- print_xml()
 
     @staticmethod
-    def print_xml(cff:CFFFontSet):
+    def cff_to_xml(cff:bytes, filePath:str):
         '''
+        Writes a CFF font program to file.
         '''
         from fontTools.misc.xmlWriter import XMLWriter
-        print('***** toXML()', cff.toXML(XMLWriter('__debug.xml')))
-
+        ttFont = TTFont()
+        cffFont = CFFFontSet()
+        cffFont.decompile(file = BytesIO(cff), otFont = ttFont)
+        cffFont.toXML(XMLWriter(filePath))
 
 # =========================================================================== class PdfFont
 
@@ -706,7 +755,7 @@ def __init__(self,
 
                 self.extract_font_program()
 
-                self.info = self.read_pfb_info() if self.pfb \
+                self.info = PdfFontFile.read_pfb_info(self.pfb) if self.pfb \
                                 else PdfFontFile.read_ttf_otf_info(self.ttf or self.otf) if (self.ttf or self.otf) \
                                 else PdfFontFile.read_cff_info(self.cff) if self.cff \
                                 else PdfFontFile.read_core14_info(self.get_font_name())
@@ -731,7 +780,7 @@ def __init__(self,
         
         # Set font's bounding box
         if self.font.Subtype == '/Type3':
-            self.bbox = self.get_type3_bbox()
+            self.bbox = PdfFontDictFunc.get_type3_bbox(self.font)
         else:
             try: self.bbox = BOX(self.get_font_descriptor().FontBBox)
             except: self.bbox = BOX([0, 0, 1000, 1000])
@@ -798,7 +847,7 @@ def make_encoding(self, cc2gname:dict = None, cc2unicode:dict = None):
             cc2gname = {cc:unicode2gname[ord(u)] for cc,u in cc2unicode.items() if ord(u) in unicode2gname}
         
         if not cc2gname:
-            cc2gname = self.info['gid2gname']
+            cc2gname = {chr(gid):gname for gid, gname in self.info['gid2gname'].items()}
 
  
         encoding = PdfFontEncoding()
@@ -826,26 +875,28 @@ def get_cid_encoding_from_type0_font(self):
         gid2gname = self.info.get('gid2gname')
         if not gid2gname: return None
 
-        # Get cid2gid
-        cid2gid = self.get_cidtogidmap()
+        # Get cid2gid (str -> str)
+        cid2gid = PdfFontDictFunc.get_cidtogidmap(self.font)
         if not cid2gid:
             cid2gid = {cc:cc for cc in self.get_cc2width_from_font()}
 
         # Append cids from CIDSet if it exists
-        CIDSet = self.get_cidset()
+        CIDSet = PdfFontDictFunc.get_cidset(self.font)
         if CIDSet:
             for cid in CIDSet:
                 if cid not in cid2gid: cid2gid[cid] = cid
 
-        # msg(f'{self.get_font_name()}: cc2width --> encoding')
+        if len(cid2gid) == 0:
+            name = self.font.BaseFont or self.DescendantFonts[0].BaseFont
+            cid2gid = {chr(gid):chr(gid) for gid in gid2gname}
 
         # Create cid2gname
-        cid2gname = {cid:gid2gname.get(gid,'.notdef') for cid,gid in sorted(cid2gid.items())}
+        cc2gname = {cid:gid2gname.get(ord(gid),'.notdef') for cid,gid in sorted(cid2gid.items())}
 
         # Create encoding
         encoding = PdfFontEncoding()
         encoding.name = PdfName('CIDEncoding')
-        encoding.cc2glyphname = {cc:PdfName(gname) for cc,gname in cid2gname.items()}
+        encoding.cc2glyphname = {cc:PdfName(gname) for cc,gname in cc2gname.items()}
         encoding.reset_glyphname2cc()
 
         return encoding
@@ -865,11 +916,14 @@ def get_cid2unicode(self, encoding:PdfFontEncoding = None):
 
     # -------------------------------------------------------------------------------- get_cid2width()
 
-    def get_cid2width(self, encoding:PdfFontEncoding = None):
+    def get_cid2width_from_info(self, encoding:PdfFontEncoding = None):
         '''
+        Get a map form CIDs to widths (int -> float) from self.info
         '''
-        cid2gname = {cid:gname[1:] for cid,gname in encoding.cc2glyphname.items()} if encoding \
-                        else {gid:gid for gid in self.info['gid2gname'].items()}
+        if encoding:
+            cid2gname = {ord(cc):gname[1:] for cc,gname in encoding.cc2glyphname.items()}
+        else:
+            cid2gname = self.info['gid2gname']
         gname2width = self.info['gname2width']
         return {cid:gname2width.get(gname,0) for cid,gname in cid2gname.items()}
 
@@ -877,9 +931,9 @@ def get_cid2width(self, encoding:PdfFontEncoding = None):
 
     def make_font_dict(self, encoding:PdfFontEncoding = None, force_CID:bool = False):
 
-        # ................................................................................ get_flags()
+        # ................................................................................ make_flags()
 
-        def get_flags(info:dict):
+        def make_flags(info:dict):
             '''
             Calculates the `Flags` bit field.
             '''
@@ -957,7 +1011,7 @@ def get_flags(info:dict):
 
         elif self.ttf: # Make FontFile2
 
-            fontProgram = PdfFont.fix_ttf_font(self.ttf)
+            fontProgram = PdfFontFile.fix_ttf_font(self.ttf)
  
             FontFile2 = IndirectPdfDict(
                 Length1 = len(fontProgram),
@@ -984,7 +1038,7 @@ def get_flags(info:dict):
         FontDescriptor = IndirectPdfDict(
             Type = PdfName('FontDescriptor'),
             FontName = PdfName(FontName),
-            Flags = get_flags(self.info),
+            Flags = make_flags(self.info),
             # FontBBox = PdfArray(self.info['FontBBox']),
             FontBBox = PdfArray(bbox),
             ItalicAngle = self.info['ItalicAngle'],
@@ -1019,14 +1073,14 @@ def get_flags(info:dict):
                 CIDToGIDMap = None
             else:
                 gname2gid = {gname:gid for gid,gname in self.info['gid2gname'].items()}
-                cid2gid = {cid:gname2gid.get(gname[1:],0) for cid,gname in encoding.cc2glyphname.items()}
-                gids = [ord(cid2gid.get(chr(cid),chr(0))) for cid in range(maxCID + 1)]
+                cid2gid = {ord(cc):gname2gid.get(gname[1:],0) for cc,gname in encoding.cc2glyphname.items()}
+                gids = [cid2gid.get(cid,0) for cid in range(maxCID + 1)]
                 CIDToGIDMap = IndirectPdfDict(
                     stream=py23_diffs.convert_load(b''.join(bytes([gid >> 8, gid & 255]) for gid in gids))
                 )
             
             # Widths
-            W = PdfArray([x for cid,w in self.get_cid2width(encoding).items() for x in [ord(cid),PdfArray([w])]])
+            W = PdfArray([x for cid,w in self.get_cid2width_from_info(encoding).items() for x in [cid,PdfArray([w])]])
 
             # CIDFontSubtype; see PDF Ref. Sec. 5.8, Table 5.23 Embedded font organization for various font types
             CIDFontSubtype = PdfName('CIDFontType2') if otfWithGlyf or self.ttf else PdfName('CIDFontType0')
@@ -1071,7 +1125,7 @@ def get_flags(info:dict):
 
             Differences, FirstChar, LastChar = PdfFontEncoding.cc2glyphname_to_differences(encoding.cc2glyphname)
 
-            Widths = PdfArray([self.get_cid2width(encoding).get(chr(i),0) for i in range(FirstChar, LastChar+1)])
+            Widths = PdfArray([self.get_cid2width_from_info(encoding).get(i,0) for i in range(FirstChar, LastChar+1)])
 
             fontDict = IndirectPdfDict(
                 Type = PdfName('Font'),
@@ -1134,18 +1188,46 @@ def extract_font_program(self):
                 self.otf = FontProgram
             else:
                 raise ValueError(f'invalid FontFile3.Subtype: {subtype}')
+            
+    
+    # -------------------------------------------------------------------------------- save()
+
+    def save(self, basePath:str = None):
+        '''
+        The basePath arguments is is the intended path to the saved font file without the file's extension.
+        If it's None then the self.name is chosen as the file name, and the file is saved in the current folder.
+        '''
+        fontProgram = self.pfb or self.ttf or self.otf or self.cff
+        if fontProgram is None:
+            raise ValueError(f'no font program in font: {self.name}')
+        if basePath is None:
+            basePath = self.name[1:]
+        ext = 'pfb' if self.pfb else 'ttf' if self.ttf else 'otf' if self.otf else 'cff' if self.cff else None
+
+        # if ext == 'cff':
+        #     PdfFontFile.cff_to_xml(self.cff, basePath + '.xml')
+        #     return
+
+        open(basePath + '.' + ext, 'wb').write(fontProgram)
+        return
 
     # -------------------------------------------------------------------------------- get_font_name()
 
     def get_font_name(self):
         '''
+        This is a wrapper function around `PdfFontDictFunc.get_font_name()` which does some extra work:
+
+        * if `self.font` doesn't have a name, this function will return `f'/T3Font{N}'`, where `N`
+        is a consecutive integer (note: Type3 font type is the only one that is allowed not to have a name);
+        * if the name of `self.font` has been encountered in previous calls to this function, it will
+        return a "versioned" variant of the name `f'{name}-v{N}'`, where `N` is a consecutive integer.
+
+        Altogether, this ensures that names returned for different fonts never coincide.
         '''
         nDuplicates = lambda d, k: d.get(k) or d.update({k:len(d)+1}) or len(d) # duplicates counter
         f = self.font
 
-        result = f.Name if f.Subtype == '/Type3' \
-            else f.DescendantFonts[0].BaseFont if f.Subtype == '/Type0' and f.DescendantFonts != None \
-            else f.BaseFont
+        result = PdfFontDictFunc.get_font_name(f)
 
         if result not in self.__fontNameCache: self.__fontNameCache[result] = {}
         idx = nDuplicates(self.__fontNameCache[result], id(f))
@@ -1294,7 +1376,7 @@ def get_cc2width_from_font(self):
             gid2gname = self.info.get('gid2gname')
             cc2width = {cc:ww for cc,ww in cc2width.items() \
                         if gname2width and self.encoding.cc2glyphname.get(cc, '/None')[1:] in gname2width \
-                            or gid2gname and cc in gid2gname \
+                            or gid2gname and ord(cc) in gid2gname \
                             or gid2gname is None and gname2width is None}
 
         else: # CID fonts
@@ -1328,9 +1410,10 @@ def get_cc2width_from_font(self):
                 for cc in cidset:
                     if cc not in cc2width: cc2width[cc] = defaultWidth
 
-            # elif gid2gname := self.info.get('gid2gname'):
-            #     for cc in gid2gname:
-            #         if cc not in cc2width: cc2width[cc] = defaultWidth
+            if len(cc2width) == 0:
+                gid2gname = self.info.get('gid2gname')
+                if gid2gname:
+                    cc2width = {chr(gid):defaultWidth for gid in gid2gname}
 
 
         # Rescale from font units to document units
@@ -1627,6 +1710,44 @@ def loadFont(self, fontNames:list[str], dirList:list[str], forceCID = False):
         self.fontsCache[nameFound] = font
         return font
 
+# ------------------------------------------------------- get_object_fonts()
+
+def get_object_fonts(xobj:PdfDict,
+                        fontTypes:list[str] = None,
+                        regex:str = None):
+    '''
+    Returns a `{id(font):font}` dictionary of fonts used by `xobj` whose Subtypes match those in the `fontTypes` list.
+    Setting `fontTypes = None` has the same effect as setting it to:
+
+    `['/Type1', '/MMType1', '/TrueType', '/Type3', '/Type0']`.
+
+    Example:
+    
+    `get_object_fonts(pdf, fontTypes = ['/Type3']).`
+    
+    If the `regex` argument is provided only the fonts whose names match the regex are selected. Fonts
+    that have no name are always included.
+    '''
+    cache = set()
+
+    fontName = PdfFontDictFunc.get_font_name
+
+    if fontTypes is None:
+        fontTypes = ['/Type1', '/MMType1', '/TrueType', '/Type3', '/Type0']
+
+    fontFilter = lambda obj: \
+        isinstance(obj, PdfDict) \
+        and obj.Type == PdfName.Font \
+        and obj.Subtype in fontTypes \
+        and (regex in [None, '.'] or fontName(obj) is not None and re.search(regex, fontName(obj), re.IGNORECASE))
+
+    if xobj.pages:
+        objectTuples = [t for page in xobj.pages for t in PdfObjects(page, cache=cache)]
+    else:
+        objectTuples = PdfObjects(xobj, cache=cache)
+
+    return {id(obj):obj for name, obj in objectTuples if fontFilter(obj)}
+
 # ============================================================================= main()
 
 if __name__ == '__main__':
diff --git a/pdffontencoding.py b/pdffontencoding.py
index c24a278..6000b33 100755
--- a/pdffontencoding.py
+++ b/pdffontencoding.py
@@ -28,29 +28,31 @@ def __init__(self,
         self.baseEncoding = None
 
         gid2gname = fontInfo.get('gid2gname') if fontInfo else None
+        cmap10 = fontInfo.get('cmap10') if fontInfo else None
+        cmap30 = fontInfo.get('cmap30') if fontInfo else None
 
         # self.baseEncoding = '/WinAnsiEncoding'
 
         self.cc2glyphname = {} # A map from character codes (chars) to glyph names
         self.glyphname2cc = {} # A reverse map from glyph names to character codes (chars)
 
+        cc2g = {}
         if name != None:
 
             self.name = name
-            self.cc2glyphname = PdfFontEncodingStandards.get_cc2glyphname(name)
-            if self.cc2glyphname == None: err(f'invalid encoding name: {name}')
+            cc2g = PdfFontEncodingStandards.get_cc2glyphname(name)
+            if cc2g == None: err(f'invalid encoding name: {name}')
 
         if differences != None:
 
             self.name = '[/Differences]'
-            self.cc2glyphname = PdfFontEncoding.differences_to_cc2glyphname(differences)
+            cc2g = PdfFontEncoding.differences_to_cc2glyphname(differences)
 
         if font != None:
 
             assert font.Subtype != '/Type0' # SIMPLE font
 
             self.isType3 = font.Subtype == '/Type3'
-            self.cc2glyphname = {}
 
             fd = font.FontDescriptor
             isEmbedded = fd and (fd.FontFile or fd.FontFile2 or fd.FontFile3)
@@ -74,9 +76,9 @@ def __init__(self,
                         else None
 
                     if self.baseEncoding:
-                        self.cc2glyphname = PdfFontEncodingStandards.get_cc2glyphname(self.baseEncoding)
+                        cc2g = PdfFontEncodingStandards.get_cc2glyphname(self.baseEncoding)
                     elif gid2gname:
-                        self.cc2glyphname = {gid:PdfName(gname) for gid,gname in gid2gname.items()}
+                        cc2g = {chr(gid):PdfName(gname) for gid,gname in gid2gname.items()}
 
                 # PDF Ref. v1.7 sec. 5.5.5:
                 # If the Encoding entry is a dictionary, the table is initialized with the entries from the
@@ -88,7 +90,7 @@ def __init__(self,
 
                 if font.Encoding.Differences != None:
                     differencesMap = PdfFontEncoding.differences_to_cc2glyphname(font.Encoding.Differences)
-                    self.cc2glyphname = self.cc2glyphname | differencesMap
+                    cc2g = cc2g | differencesMap
                     
                 self.name = [font.Encoding.BaseEncoding, '/Differences' if font.Encoding.Differences != None else None]
 
@@ -96,22 +98,29 @@ def __init__(self,
 
                 self.name = font.Encoding if font.Encoding \
                             else PdfFontCore14.built_in_encoding(font.BaseFont) if not isEmbedded \
-                            else '/TrueType' if font.Subtype == '/TrueType' \
+                            else '/Built-In' if font.Subtype == '/TrueType' \
                             else None
 
-                if self.name == '/TrueType':
-                    if cmap30 := fontInfo.get('cmap30'):
-                        self.cc2glyphname = {cc:PdfName(gname) for cc,gname in cmap30.items()}
-                    elif cmap10 := fontInfo.get('cmap10'):
-                        self.cc2glyphname = {cc:PdfName(gname) for cc,gname in cmap10.items()}
+                if self.name == '/Built-In':
+                    # PDF Ref. v1.7 p.432
+                    if len(fontInfo) == 0:
+                        # In case font program extraction failed or was not requested
+                        pass
+                    elif cmap30:
+                        high = [(ord(cc)>>8) for cc in cmap30]
+                        if not any(all(h==a for h in high) for a in [0x00, 0xf0, 0xf1, 0xf2]):
+                            raise ValueError(f"failed (3,0) cmap range check in a TrueType font: {font.BaseFont}")
+                        cc2g = {chr(ord(cc) & 0xff):PdfName(gname) for cc,gname in cmap30.items()}
+                    elif cmap10:
+                        cc2g = {cc:PdfName(gname) for cc,gname in cmap10.items()}
                     else:
-                        raise ValueError(f'no built-in encoding in a TrueType font: {font.BaseFont}')
+                        raise ValueError(f'no (3,0) or (1,0) cmap in a TrueType font: {font.BaseFont}')
                 elif self.name:
-                    self.cc2glyphname = PdfFontEncodingStandards.get_cc2glyphname(self.name)
+                    cc2g = PdfFontEncodingStandards.get_cc2glyphname(self.name)
                 elif gid2gname:
-                    self.cc2glyphname = {gid:PdfName(gname) for gid,gname in gid2gname.items()}
+                    cc2g = {chr(gid):PdfName(gname) for gid,gname in gid2gname.items()}
 
-        # reset self.glyphname2cc
+        self.cc2glyphname = cc2g
         self.reset_glyphname2cc()
 
     # -------------------------------------------------------------------------------- differences_to_cc2glyphname()
diff --git a/pdffontglyphmap.py b/pdffontglyphmap.py
index 9e44557..902312b 100755
--- a/pdffontglyphmap.py
+++ b/pdffontglyphmap.py
@@ -247,7 +247,7 @@ def reencode_cmap(self, cmap:PdfFontCMap, encoding:PdfFontEncoding, direct=False
     def strip_dot_endings(s:str):
         '''Strips ._, .sc & .cap endings from a string; useful to match variants (small caps etc) of glyph names in glyph lists
         '''
-        s1 = re.sub(r'(\.(_|sc|cap|alt[0-9]*|disp|big|small|ts1|lf|swash))+$','', s)
+        s1 = re.sub(r'(\.(_|sc|cap|alt[0-9]*|vsize[0-9]*|hsize[0-9]*|disp|big|small|ts1|lf|tf|swash))+$','', s)
         s1 = re.sub(r'\\rm', '', s1)
         return s1 if len(s1)>1 else s
 
diff --git a/pdfimage.py b/pdfimage.py
index 8313164..debdf68 100755
--- a/pdfimage.py
+++ b/pdfimage.py
@@ -166,20 +166,34 @@ def create_profile(cs:CS_TYPE):
             ctx = lc.cmsCreateContext(None, None)
             white = lc.cmsCIExyY()
             white.x, white.y, white.Y = x, y, Y
+
             if name == '/Lab':
+
                 profile = lc.cmsCreateLab2Profile(white)
+
             if name == '/CalGray':
+
                 gamma = float(dic.Gamma) if dic.Gamma != None else 1
                 transferFunction = lc.cmsBuildGamma(ctx,gamma)
                 profile = lc.cmsCreateGrayProfile(white, transferFunction)
+
             if name == '/CalRGB':
+
                 primaries = [float(v) for v in dic.Matrix] if dic.Matrix != None else [1,0,0,0,1,0,0,0,1]
-                primaries = [primaries[3*i,3*i+3] for i in range(3)]
+                primaries = [primaries[3*i:3*i+3] for i in range(3)]
                 primaries = [[X/(X+Y+Z),Y/(X+Y+Z),Y] for X,Y,Z in primaries]
                 gamma = [float(v) for v in dic.Gamma] if dic.Gamma != None else [1,1,1]
                 transferFunction = [lc.cmsBuildGamma(ctx, g) for g in gamma]
-                profile = lc.cmsCreateRGBProfile(white, primaries, transferFunction)
+
+                p = lc.cmsCIExyYTRIPLE()
+                pt.Red = lc.cmsCIExyY(); pt.Red.x, pt.Red.y, pt.Red.Y = primaries[0]
+                pt.Green = lc.cmsCIExyY(); pt.Green.x, pt.Green.y, pt.Green.Y = primaries[1]
+                pt.Blue = lc.cmsCIExyY(); pt.Blue.x, pt.Blue.y, pt.Blue.Y = primaries[2]
+                
+                profile = lc.cmsCreateRGBProfile(white, pt, transferFunction)
+
             with tempfile.TemporaryDirectory() as tmp:
+
                 T = lambda fileName: os.path.join(tmp, fileName)
                 lc.cmsSaveProfileToFile(profile, T('profile.cms'))
                 icc_profile = open(T('profile.cms'),'rb').read()
@@ -454,10 +468,13 @@ def toBytes(s:str): return s.encode('Latin-1')
                 width, height = int(obj.Width), int(obj.Height)
                 cpp = self.get_cpp()
 
-                bpc_implied = (len(stream) * 8) / (width * height * cpp) if len(stream) > 1 else 1
-                if bpc_implied != self.bpc and int(bpc_implied) == bpc_implied:
-                    warn(f'replacing bad image xobject\'s bpc = {self.bpc} with the implied bpc = {int(bpc_implied)}')
-                    self.bpc = int(bpc_implied)
+                # Sometimes /BitsPerComponent is incorrect
+                bytesPerLine = len(stream) / height
+                if bytesPerLine == int(bytesPerLine):
+                    bpc_implied = (bytesPerLine * 8) / (width * cpp) if len(stream) > 1 else 1
+                    if bpc_implied != self.bpc and int(bpc_implied) == bpc_implied:
+                        warn(f'replacing bad image xobject\'s bpc = {self.bpc} with the implied bpc = {int(bpc_implied)}')
+                        self.bpc = int(bpc_implied)
 
                 array = PdfFilter.unpack_pixels(stream, width, cpp, self.bpc, truncate = True)
                 if array.shape[0] > height:
@@ -614,8 +631,9 @@ def render(self, pdfPage:PdfDict = None, debug:bool = False):
         # Default colorspace
         try:
             cs2cs = {'/DeviceGray':'/DefaultGray', '/DeviceRGB':'/DefaultRGB', '/DeviceCMYK':'/DefaultCMYK'}
-            cs = pdfPage.Resources.ColorSpace[cs2cs[self.ColorSpace]]
-            if debug: msg(f'Page default colorspace: {PdfColorSpace.toStr(self.ColorSpace)} --> {PdfColorSpace.toStr(cs)}')
+            cs = pdfPage.Resources.ColorSpace[cs2cs[self.ColorSpace]] or self.ColorSpace
+            if cs != None and debug:
+                msg(f'Page default colorspace: {PdfColorSpace.toStr(self.ColorSpace)} --> {PdfColorSpace.toStr(cs)}')
         except:
             cs = self.ColorSpace
 
@@ -1067,6 +1085,10 @@ def print_size_change(size_old, size_new):
 
         elif Format == 'JPEG2000':
 
+            if image.get_mode() not in ['L', 'RGB']:
+                msg(f'mode {image.get_mode()} not supported by JPEG2000; skipping')
+                return
+
             assert Q or CR
             if Q: assert 0 < Q <= 100
             if CR: assert CR > 1
@@ -1388,15 +1410,28 @@ def _jp2_write(array:np.ndarray, alpha:np.ndarray, cs:CS_TYPE, Q:int, CR:float):
         '''
         Encode array as a JPEG2000 image.
         '''
-        assert CR or Q
-        CR = int(round(CR)) if CR != None else int(round(2 ** ((100 - Q)/10.0)))
+        if alpha is not None:
+            raise ValueError(f'JPEG2000 encoding with alpha-channel is not implemented')
+
+        # make sure colorspace is either grayscale or RGB
         cpp = PdfColorSpace.get_cpp(cs)
         if cpp not in [1,3]:
             raise ValueError(f'Jp2k: cpp = {cpp} not supported (must be 1 or 3)')
+
+        # determine compression ratio
+        assert CR or Q
+        CR = int(round(CR)) if CR != None else int(round(2 ** ((100 - Q)/10.0)))
+
+        # limit the number of resolutions for small images
+        numres = 6 # the default for large images
+        while 1 << numres > min(array.shape[:2]):
+            numres -= 1
+
+        # encode
         with tempfile.TemporaryDirectory() as tmp:
             T = lambda fileName: os.path.join(tmp, fileName)
             # can also try: cratios=[CR*4, CR*2, CR]
-            Jp2k(T('encoded.jp2'), array, colorspace='RGB' if cpp == 3 else 'Gray', cratios=[CR])
+            Jp2k(T('encoded.jp2'), array, colorspace='RGB' if cpp == 3 else 'Gray', cratios=[CR], numres=numres)
             
             return open(T('encoded.jp2'), 'rb').read()
 
@@ -1756,7 +1791,7 @@ def modify_image_xobject(image_obj:IndirectPdfDict, pdfPage:PdfDict, options:Pdf
                 modified = True
 
         if options.colorspace:
-            image.render(pdfPage = pdfPage)
+            image.render(pdfPage = pdfPage, debug = options.debug)
             mode = {'cmyk':'CMYK', 'rgb':'RGB', 'gray':'L', 'grey':'L'}.get(options.colorspace.lower())
             msg(f'converting colorspace: {image.get_mode()} --> {mode}')
             modified = image.change_mode(mode, intent)
@@ -1822,6 +1857,7 @@ def getPageRange(s:str):
     ap.add_argument('-output', '-o', type=str, metavar='PATH', help='output PDF file path')
     ap.add_argument('-pages', type=str, metavar='RANGE', help='process selected pages; RANGE = N1[,N2-N3[,..]]')
     ap.add_argument('-dpi', type=float, metavar='N', help='set resolution of input images to DPI')
+    ap.add_argument('-debug', action='store_true', help='turns debugging on')
 
     ap.add_argument('-bitonal', action='store_true', help='convert color/gray images to bitonal using Otsu\'s algorithm')
     ap.add_argument('-auto', action='store_true', help='detect if gray/color images are in fact bitonal and convert them')
diff --git a/pdfstreameditor.py b/pdfstreameditor.py
index 23ad31c..f6314ed 100644
--- a/pdfstreameditor.py
+++ b/pdfstreameditor.py
@@ -35,7 +35,9 @@
 
 class PdfStreamEditor:
 
-    def __init__(self, xobj:PdfDict, glyphMap:PdfFontGlyphMap,
+    def __init__(self,
+                    xobj:PdfDict,
+                    glyphMap:PdfFontGlyphMap = PdfFontGlyphMap(),
                     textOnly:bool=False,
                     graphicsOnly:bool=False,
                     normalize=False,
@@ -87,8 +89,8 @@ def normalize_text_operators(self, tree:list, state:PdfState = None) -> list:
         '''
         if state == None: state = PdfState(self.xobj.inheritable.Resources,
                                            self.glyphMap,
-                                           extractFontProgram=self.extractFontProgram,
-                                           makeSyntheticCmap=self.makeSyntheticCmap)
+                                           extractFontProgram=False,
+                                           makeSyntheticCmap=False)
         result = []
         for leaf in tree:
             cmd, args = leaf[0], leaf[1]
@@ -216,8 +218,14 @@ def recurse(self, recursedFunction:Callable, xobjCache, *args, **kwarg):
         for x in xobjects:
             if id(x) not in xobjCache and x.Subtype == PdfName.Form and x.stream != None:
                 # Creates an editor of the same type as that of any inheriting class
-                editor = type(self)(x, self.glyphMap, textOnly = self.textOnly, graphicsOnly=self.graphicsOnly,
-                                    normalize = self.normalize, debug = self.debug)
+                editor = type(self)(xobj = x,
+                                    glyphMap = self.glyphMap,
+                                    textOnly = self.textOnly,
+                                    graphicsOnly = self.graphicsOnly,
+                                    normalize = self.normalize,
+                                    debug = self.debug,
+                                    extractFontProgram = self.extractFontProgram,
+                                    makeSyntheticCmap = self.makeSyntheticCmap)
                 xobjCache[id(x)] = editor.recurse(recursedFunction, xobjCache, *args, **kwarg)
                 if editor.isModified: self.isModified = True
 
@@ -242,8 +250,8 @@ def flattenImages(self, dpi:float = 300):
         # This state is local to this function
         state = PdfState(resources = self.xobj.inheritable.Resources,
                               glyphMap = self.glyphMap,
-                              extractFontProgram = self.extractFontProgram,
-                              makeSyntheticCmap = self.makeSyntheticCmap)
+                              extractFontProgram = False,
+                              makeSyntheticCmap = False)
 
         res = self.xobj.inheritable.Resources
 
@@ -493,6 +501,8 @@ def processTextFunction(self, xobjCache:dict, tree:list=None, options:dict = {})
                     btText = PdfStreamEditor.chunks_to_text(btChunks)
 
                     discardText = (regex != '' and re.search(regex,btText) != None)
+
+
                     discardOCR = False if not removeOCR else \
                         any(len(kid[1])>0 and (kid[0],kid[1][0]) == ('Tr','3') for kid in leaf[2])