-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathviewer.py
1893 lines (1651 loc) · 78.4 KB
/
viewer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Name: viewer.py
# Package: wx.lib.pdfviewer
#
# Purpose: A PDF report viewer class
#
# Author: David Hughes dfh@forestfield.co.uk
# Copyright: Forestfield Software Ltd
# Licence: Same as wxPython host
#
# History: Created 17 Jun 2009
#
# Chris Johnson May 2022
# Now accepts images which are NOT compressed and therefor
# without /Filter parameter.
# Images with /ICCBased colour now fall back to appropriate
# /DeviceXXX model (including /Indexed)
#
# Chris Johnson October 2021
# Do command improvements (insert of Xobject)
# Text handling with improved PyPDF2
# Chris Johnson (johns1c@btinternet.com)
# Significant additions but some require modded pypdf2
# rolled in 3.7 changes to remove wx.NewID
#
# Michael Hipp 08 Oct 2011 ( michael@redmule.com )
# Added prompt, printer_name, orientation options to
# pdfViewer.Print(). Added option to pdfViewer.LoadFile() to
# accept a file-like object as well as a path string
# 17 Jun 2009
# Original
#
# Tags: phoenix-port, documented, unittest
#----------------------------------------------------------------------------
"""
This module provides the :class:`~wx.lib.pdfviewer.viewer.pdfViewer` to view PDF
files.
"""
import sys
import os
import time
import types
import copy
import shutil
from six import BytesIO, string_types
import wx
VERBOSE = True
try:
# see http://pythonhosted.org/PyMuPDF - documentation & installation
import fitz
mupdf = True
if VERBOSE: print('pdfviewer using PyMuPDF (GPL)')
except ImportError:
mupdf = False
try:
# see http://pythonhosted.org/PyPDF2
import PyPDF2
from PyPDF2 import PdfFileReader
try:
from PyPDF2.pdf import PageObject
except ImportError :
from PyPDF2._page import PageObject
try:
from PyPDF2.pdf import ContentStream
except ImportError:
from PyPDF2.generic import ContentStream
from PyPDF2.filters import ASCII85Decode, FlateDecode , LZWDecode, CCITTFaxDecode
from PyPDF2.toUnicode import FetchFontExtended , as_text
from PyPDF2.utils import glyph2unicode
if VERBOSE: print('pdfviewer using PyPDF2')
except ImportError:
msg = "PyMuPDF or PyPDF2 must be available to use pdfviewer"
raise ImportError(msg)
GraphicsContext = wx.GraphicsContext
have_cairo = False
if not mupdf:
try:
import wx.lib.wxcairo as wxcairo
import cairo
from wx.lib.graphics import GraphicsContext
have_cairo = True
if VERBOSE: print('pdfviewer using Cairo')
except ImportError:
if VERBOSE: print('pdfviewer using wx.GraphicsContext')
# New PageObject method added by Forestfield Software
def extractOperators(self):
"""
Locate and return all commands in the order they
occur in the content stream
"""
ops = []
if "/Contents" not in self :
print( "Page has no content" )
return ops
try:
content = self["/Contents"].getObject()
except :
print( '+++++++++++++++ do we have contents +++++' )
import pdb
pdb.set_trace()
if not isinstance(content, ContentStream):
content = ContentStream(content, self.pdf)
for op in content.operations:
if type(op[1] == bytes):
op = (op[0], op[1].decode())
ops.append(op)
return ops
# Inject this method into the PageObject class
PageObject.extractOperators = extractOperators
# If reportlab is installed, use its stringWidth metric. For justifying text,
# where widths are cumulative, dc.GetTextExtent consistently underestimates,
# possibly because it returns integer rather than float.
try:
from reportlab.pdfbase.pdfmetrics import stringWidth
have_rlwidth = True
if VERBOSE: print('pdfviewer using reportlab stringWidth function')
except ImportError:
have_rlwidth = False
#----------------------------------------------------------------------------
class pdfViewer(wx.ScrolledWindow):
"""
View pdf file in a scrolled window. Contents are read from PDF file
and rendered in a GraphicsContext. Show visible window contents
as quickly as possible then, when using pyPDF, read the whole file and build
the set of drawing commands for each page. This can take time for a big file or if
there are complex drawings eg. ReportLab's colour shading inside charts and a
progress bar can be displayed by setting self.ShowLoadProgress = True (default)
"""
def __init__(self, parent, nid, pos, size, style):
"""
Default class constructor.
:param wx.Window `parent`: parent window. Must not be ``None``;
:param integer `nid`: window identifier. A value of -1 indicates a default value;
:param `pos`: the control position. A value of (-1, -1) indicates a default position,
chosen by either the windowing system or wxPython, depending on platform;
:type `pos`: tuple or :class:`wx.Point`
:param `size`: the control size. A value of (-1, -1) indicates a default size,
chosen by either the windowing system or wxPython, depending on platform;
:type `size`: tuple or :class:`wx.Size`
:param integer `style`: the button style (unused);
"""
print( 'starting viewer ...' )
wx.ScrolledWindow.__init__(self, parent, nid, pos, size,
style | wx.NO_FULL_REPAINT_ON_RESIZE)
self.SetBackgroundStyle(wx.BG_STYLE_CUSTOM) # recommended in wxWidgets docs
self.buttonpanel = None # reference to panel is set by their common parent
self._showLoadProgress = (not mupdf)
self.Bind(wx.EVT_PAINT, self.OnPaint)
self.Bind(wx.EVT_SIZE, self.OnResize)
self.Bind(wx.EVT_SCROLLWIN, self.OnScroll)
self.Bind(wx.EVT_IDLE, self.OnIdle)
self.have_file = False
self.resizing = False
self.numpages = None
self.zoomscale = -1 # fit page to screen width
self.nom_page_gap = 20 # nominal inter-page gap (points)
self.scrollrate = 20 # pixels per scrollbar increment
self.page_buffer_valid = False
self.page_after_zoom_change = None
self.ClearBackground()
def OnIdle(self, event):
"""
Redraw on resize.
"""
if self.resizing:
self.page_buffer_valid = False
self.Render()
self.resizing = False
event.Skip()
def OnResize(self, event):
"""
Buffer size change due to client area resize.
"""
self.resizing = True
event.Skip()
def OnScroll(self, event):
"""
Recalculate and redraw visible area. CallAfter is *essential*
for coordination.
"""
wx.CallAfter(self.Render)
event.Skip()
def OnPaint(self, event):
"""
Refresh visible window with bitmap contents.
"""
paintDC = wx.PaintDC(self)
paintDC.Clear() # in case buffer now smaller than visible window
if hasattr(self, 'pdc'):
paintDC.Blit(0, 0, self.winwidth, self.winheight, self.pdc,
self.xshift, self.yshift)
#----------------------------------------------------------------------------
# This section defines the externally callable methods:
# LoadFile, Save, Print, SetZoom, and GoPage
# also the getter and setter for ShowLoadProgress
# that is only applicable if using PyPDF2
def LoadFile(self, pdf_file):
"""
Read pdf file. Assume all pages are same size, for now.
:param `pdf_file`: can be either a string holding
a filename path or a file-like object.
"""
print( 'viewer LoadFile loading {} ...'.format(pdf_file) )
def create_fileobject(filename):
"""
Create and return a file object with the contents of filename,
only used for testing.
"""
f = open(filename, 'rb')
stream = f.read()
return BytesIO(stream)
self.pdfpathname = ''
if isinstance(pdf_file, string_types):
# a filename/path string, save its name
self.pdfpathname = pdf_file
# remove comment from next line to test using a file-like object
# pdf_file = create_fileobject(pdf_file)
global missing_fonts
missing_fonts = []
if mupdf:
self.pdfdoc = mupdfProcessor(self, pdf_file)
else:
self.pdfdoc = pypdfProcessor(self, pdf_file, self.ShowLoadProgress)
self.numpages = self.pdfdoc.numpages
self.pagewidth = self.pdfdoc.pagewidth
self.pageheight = self.pdfdoc.pageheight
self.page_buffer_valid = False
self.Scroll(0, 0) # in case this is a re-LoadFile
self.CalculateDimensions() # to get initial visible page range
# draw and display the minimal set of pages
self.pdfdoc.DrawFile(self.frompage, self.topage)
self.have_file = True
# now draw full set of pages
wx.CallAfter(self.pdfdoc.DrawFile, 0, self.numpages-1)
def Save(self):
"Save a copy of the pdf file if it was originally named"
if self.pdfpathname:
wild = "Portable document format (*.pdf)|*.pdf"
dlg = wx.FileDialog(self, message="Save file as ...",
wildcard=wild, style=wx.FD_SAVE | wx.FD_OVERWRITE_PROMPT)
if dlg.ShowModal() == wx.ID_OK:
pathname = dlg.GetPath()
shutil.copy(self.pdfpathname, pathname)
dlg.Destroy()
def Print(self, prompt=True, printer_name=None, orientation=None):
"""
Print the pdf.
:param boolean `prompt`: show the print dialog to the user (True/False). If
False, the print dialog will not be shown and the pdf will be printed
immediately. Default: True.
:param string `printer_name`: the name of the printer that is to
receive the printout. Default: as set by the O/S.
:param `orientation`: select the orientation (:class:`wx.PORTRAIT` or
:class:`wx.LANDSCAPE`) for the printout. Default: as set by the O/S.
"""
pdd = wx.PrintDialogData()
pdd.SetMinPage(1)
pdd.SetFromPage(1)
pdd.SetMaxPage(self.numpages)
pdd.SetToPage(self.numpages)
pdata = pdd.GetPrintData()
if printer_name:
pdata.SetPrinterName(printer_name)
if orientation:
pdata.SetOrientation(orientation)
# PrintData does not return actual PrintQuality - it can't as printer_name not known
# but it defaults to wx.PRINT_QUALITY_HIGH, overriding user's own setting for the
# printer. However calling SetQuality with a value of 0 seems to leave the printer
# setting untouched
pdata.SetQuality(0)
printer = wx.Printer(pdd)
printout = pdfPrintout('', self)
if (not printer.Print(self, printout, prompt=prompt) and
printer.GetLastError() == wx.PRINTER_ERROR):
dlg = wx.MessageDialog(self, 'Unable to perform printing',
'Printer' , wx.OK | wx.ICON_INFORMATION)
dlg.ShowModal()
dlg.Destroy()
printout.Destroy()
def SetZoom(self, zoomscale):
"""
Positive integer or floating zoom scale will render the file at corresponding
size where 1.0 is "actual" point size (1/72").
-1 fits page width and -2 fits page height into client area
Redisplay the current page(s) at the new size
:param `zoomscale`: an integer or float
"""
pagenow = self.frompage
self.zoomscale = zoomscale
self.page_buffer_valid = False
# calling GoPage now will trigger rendering at the new size but the page location
# will be calculated based on the old zoom scale - so save the required page number
# and call GoPage again *after* rendering at the new size
self.page_after_zoom_change = pagenow
self.GoPage(pagenow)
def GoPage(self, pagenum):
"""
Go to page
:param integer `pagenum`: go to the provided page number if it is valid
"""
if pagenum > 0 and pagenum <= self.numpages:
self.Scroll(0, pagenum*self.Ypagepixels/self.GetScrollPixelsPerUnit()[1] + 1)
else:
self.Scroll(0, 0)
# calling Scroll sometimes doesn't raise wx.EVT_SCROLLWIN eg Windows 8 64 bit - so
wx.CallAfter(self.Render)
@property
def ShowLoadProgress(self):
"""Property to control if file reading progress is shown (PyPDF2 only)"""
return self._showLoadProgress
@ShowLoadProgress.setter
def ShowLoadProgress(self, flag):
"""Setter for showLoadProgress."""
self._showLoadProgress = flag
#----------------------------------------------------------------------------
# This section is concerned with rendering a sub-set of drawing commands on demand
def CalculateDimensions(self):
"""
Compute the required buffer sizes to hold the viewed rectangle and
the range of pages visible. Set self.page_buffer_valid = False if
the current set of rendered pages changes
"""
self.frompage = 0
self.topage = 0
device_scale = wx.ClientDC(self).GetPPI()[0]/72.0 # pixels per inch/points per inch
assert device_scale > 0
self.font_scale_metrics = 1.0
self.font_scale_size = 1.0
# for Windows only with wx.GraphicsContext the rendered font size is too big
# in the ratio of screen pixels per inch to points per inch
# and font metrics are too big in the same ratio for both for Cairo and wx.GC
if wx.PlatformInfo[1] == 'wxMSW':
self.font_scale_metrics = 1.0 / device_scale
if not have_cairo:
self.font_scale_size = 1.0 / device_scale
self.winwidth, self.winheight = self.GetClientSize()
if self.winheight < 100:
print( f'window height {self.winheight} too small to display ? ' )
return
self.Ypage = self.pageheight + self.nom_page_gap
if self.zoomscale > 0.0:
self.scale = self.zoomscale * device_scale
else:
if int(self.zoomscale) == -1: # fit width
self.scale = self.winwidth / self.pagewidth
else: # fit page
self.scale = self.winheight / self.pageheight
if self.scale == 0.0: # this could happen if the window was not yet initialized
self.scale = 1.0
self.Xpagepixels = int(round(self.pagewidth*self.scale))
self.Ypagepixels = int(round(self.Ypage*self.scale))
# adjust inter-page gap so Ypagepixels is a whole number of scroll increments
# and page numbers change precisely on a scroll click
idiv = self.Ypagepixels/self.scrollrate
nlo = idiv * self.scrollrate
nhi = (idiv + 1) * self.scrollrate
if nhi - self.Ypagepixels < self.Ypagepixels - nlo:
self.Ypagepixels = nhi
else:
self.Ypagepixels = nlo
self.page_gap = self.Ypagepixels/self.scale - self.pageheight
self.maxwidth = max(self.winwidth, self.Xpagepixels)
self.maxheight = max(self.winheight, self.numpages*self.Ypagepixels)
self.SetVirtualSize((self.maxwidth, self.maxheight))
self.SetScrollRate(self.scrollrate, self.scrollrate)
xv, yv = self.GetViewStart()
dx, dy = self.GetScrollPixelsPerUnit()
self.x0, self.y0 = (xv * dx, yv * dy)
self.frompage = int(min(self.y0/self.Ypagepixels, self.numpages-1))
self.topage = int(min((self.y0+self.winheight-1)/self.Ypagepixels, self.numpages-1))
self.pagebufferwidth = max(self.Xpagepixels, self.winwidth)
self.pagebufferheight = (self.topage - self.frompage + 1) * self.Ypagepixels
# Inform buttonpanel controls of any changes
if self.buttonpanel:
self.buttonpanel.Update(self.frompage, self.numpages,
self.scale/device_scale)
self.page_y0 = self.frompage * self.Ypagepixels
self.page_x0 = 0
self.xshift = self.x0 - self.page_x0
self.yshift = self.y0 - self.page_y0
if not self.page_buffer_valid: # via external setting
self.cur_frompage = self.frompage
self.cur_topage = self.topage
else: # page range unchanged? whole visible area will always be inside page buffer
if self.frompage != self.cur_frompage or self.topage != self.cur_topage:
self.page_buffer_valid = False # due to page buffer change
self.cur_frompage = self.frompage
self.cur_topage = self.topage
return
def Render(self):
"""
Recalculate dimensions as client area may have been scrolled or resized.
The smallest unit of rendering that can be done is the pdf page. So render
the drawing commands for the pages in the visible rectangle into a buffer
big enough to hold this set of pages. Force re-creating the page buffer
only when client view moves outside it.
With PyPDF2, use gc.Translate to render each page wrt the pdf origin,
which is at the bottom left corner of the page.
"""
if not self.have_file:
return
self.CalculateDimensions()
if not self.page_buffer_valid:
# Initialize the buffer bitmap.
self.pagebuffer = wx.Bitmap(self.pagebufferwidth, self.pagebufferheight)
self.pdc = wx.MemoryDC(self.pagebuffer) # must persist
gc = GraphicsContext.Create(self.pdc) # Cairo/wx.GraphicsContext API
# white background
path = gc.CreatePath()
path.AddRectangle(0, 0,
self.pagebuffer.GetWidth(), self.pagebuffer.GetHeight())
gc.SetBrush(wx.WHITE_BRUSH)
gc.FillPath(path)
for pageno in range(self.frompage, self.topage+1):
self.xpageoffset = 0 - self.x0
self.ypageoffset = pageno*self.Ypagepixels - self.page_y0
gc.PushState()
if mupdf:
gc.Translate(self.xpageoffset, self.ypageoffset)
# scaling is done inside RenderPage
else:
gc.Translate(self.xpageoffset, self.ypageoffset +
self.pageheight*self.scale)
gc.Scale(self.scale, self.scale)
self.pdfdoc.RenderPage(gc, pageno, scale=self.scale)
# Show inter-page gap
gc.SetBrush(wx.Brush(wx.Colour(180, 180, 180))) #mid grey
gc.SetPen(wx.TRANSPARENT_PEN)
if mupdf:
gc.DrawRectangle(0, self.pageheight*self.scale,
self.pagewidth*self.scale, self.page_gap*self.scale)
else:
gc.DrawRectangle(0, 0, self.pagewidth, self.page_gap)
gc.PopState()
gc.PushState()
gc.Translate(0-self.x0, 0-self.page_y0)
self.RenderPageBoundaries(gc)
gc.PopState()
self.page_buffer_valid = True
self.Refresh(0) # Blit appropriate area of new or existing page buffer to screen
# ensure we stay on the same page after zoom scale is changed
if self.page_after_zoom_change:
self.GoPage(self.page_after_zoom_change)
self.page_after_zoom_change = None
def RenderPageBoundaries(self, gc):
"""
Show non-page areas in grey.
"""
gc.SetBrush(wx.Brush(wx.Colour(180, 180, 180))) #mid grey
gc.SetPen(wx.TRANSPARENT_PEN)
gc.Scale(1.0, 1.0)
extrawidth = self.winwidth - self.Xpagepixels
if extrawidth > 0:
gc.DrawRectangle(self.winwidth-extrawidth, 0, extrawidth, self.maxheight)
extraheight = self.winheight - (self.numpages*self.Ypagepixels - self.y0)
if extraheight > 0:
gc.DrawRectangle(0, self.winheight-extraheight, self.maxwidth, extraheight)
#============================================================================
class mupdfProcessor(object):
"""
Create an instance of this class to open a PDF file, process the contents of
each page and render each one on demand using the GPL mupdf library, which is
accessed via the python-fitz package bindings (version 1.9.1 or later)
"""
def __init__(self, parent, pdf_file):
"""
:param `pdf_file`: a File object or an object that supports the standard
read and seek methods similar to a File object.
Could also be a string representing a path to a PDF file.
"""
self.parent = parent
if isinstance(pdf_file, string_types):
# a filename/path string, pass the name to fitz.open
pathname = pdf_file
self.pdfdoc = fitz.open(pathname)
else:
# assume it is a file-like object, pass the stream content to fitz.open
# and a '.pdf' extension in pathname to identify the stream type
pathname = 'fileobject.pdf'
if pdf_file.tell() > 0: # not positioned at start
pdf_file.seek(0)
stream = bytearray(pdf_file.read())
self.pdfdoc = fitz.open(pathname, stream)
self.numpages = self.pdfdoc.pageCount
self.page = self.pdfdoc.loadPage(0)
self.current_object = self.page
self.pagewidth = page.bound().width
self.pageheight = page.bound().height
self.page_rect = page.bound()
self.zoom_error = False #set if memory errors during render
def DrawFile(self, frompage, topage):
"""
This is a no-op for mupdf. Each page is scaled and drawn on
demand during RenderPage directly via a call to page.getPixmap()
"""
self.parent.GoPage(frompage)
def RenderPage(self, gc, pageno, scale=1.0):
" Render the set of pagedrawings into gc for specified page "
page = self.pdfdoc.loadPage(pageno)
matrix = fitz.Matrix(scale, scale)
try:
pix = page.getPixmap(matrix=matrix) # MUST be keyword arg(s)
if [int(v) for v in fitz.version[1].split('.')] >= [1,15,0]:
bmp = wx.Bitmap.FromBuffer(pix.width, pix.height, pix.samples)
else:
bmp = wx.Bitmap.FromBufferRGBA(pix.width, pix.height, pix.samples)
gc.DrawBitmap(bmp, 0, 0, pix.width, pix.height)
self.zoom_error = False
except (RuntimeError, MemoryError):
if not self.zoom_error: # report once only
self.zoom_error = True
dlg = wx.MessageDialog(self.parent, 'Out of memory. Zoom level too high?',
'pdf viewer' , wx.OK |wx.ICON_EXCLAMATION)
dlg.ShowModal()
dlg.Destroy()
#============================================================================
class pypdfProcessor(object):
"""
Create an instance of this class to open a PDF file, process the contents of
every page using PyPDF2 then render each one on demand
"""
def __init__(self, parent, fileobj, showloadprogress):
self.parent = parent
self.showloadprogress = showloadprogress
self.pdfdoc = PdfFileReader(fileobj)
self.numpages = self.pdfdoc.getNumPages()
page1 = self.pdfdoc.getPage(0)
self.pagewidth = float(page1.mediaBox.getUpperRight_x())
self.pageheight = float(page1.mediaBox.getUpperRight_y())
self.pagedrawings = {}
self.unimplemented = {}
self.formdrawings = {}
self.page = None
self.gstate = None
self.saved_state = None
self.knownfont = False
self.progbar = None
# These methods interpret the PDF contents as a set of drawing commands
def Progress(self, ptype, value):
" This function is called at regular intervals during Drawfile"
" changed so that it is just hidden and restored rather than being destroyed cj 2020-07 "
if ptype == 'start' and self.progbar is None :
pmsg = 'Reading pdf file'
self.progbar = wx.ProgressDialog('Load file', pmsg, value, None,
wx.PD_AUTO_HIDE|
wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
elif ptype == 'start' :
self.progbar.Show()
elif ptype == 'progress':
self.progbar.Update(value)
elif ptype == 'end':
self.progbar.Hide()
#self.progbar.Destroy() # cjcj 2020-07
def DrawFile(self, frompage, topage):
"""
Build set of drawing commands from PDF contents. Ideally these could be drawn
straight into a PseudoDC and the visible section painted directly into
scrolled window, but we need to be able to zoom and scale the output quickly
without having to rebuild the drawing commands (slow). So build our
own command lists, one per page, into self.pagedrawings.
"""
numpages_generated = 0
rp = (self.showloadprogress and frompage == 0 and topage == self.numpages-1)
if rp: self.Progress('start', self.numpages)
for pageno in range(frompage, topage+1):
self.gstate = pdfState() # state is reset with every new page
self.saved_state = []
self.page = self.pdfdoc.getPage(pageno)
self.current_object = self.page
numpages_generated += 1
pdf_fonts = self.FetchFonts(self.page)
self.pagedrawings[pageno] = self.ProcessOperators(
self.page.extractOperators(), pdf_fonts)
if rp: self.Progress('progress', numpages_generated)
if rp: self.Progress('end', None)
self.parent.GoPage(frompage)
def RenderPage(self, gc, pageno, scale=None):
"""
Render the set of pagedrawings
In a pdf file, bitmaps are treated as being of unit width and height and
are scaled via a previous ConcatTransform containing the corresponding width
and height as scale factors. wx.GraphicsContext/Cairo appear not to respond to
this so scaling is removed from transform and width & height are added
to the Drawbitmap call.
"""
if pageno > len( self.pagedrawings ) - 1 :
pageno = len( self.pagedrawings ) - 1
drawdict = {'ConcatTransform': gc.ConcatTransform,
'PushState': gc.PushState,
'PopState': gc.PopState,
'SetFont': gc.SetFont,
'SetPen': gc.SetPen,
'SetBrush': gc.SetBrush,
'DrawText': gc.DrawText,
'DrawBitmap': gc.DrawBitmap,
'CreatePath': gc.CreatePath,
'DrawPath': gc.DrawPath }
for drawcmd, args, kwargs in self.pagedrawings[pageno]:
# scale font if requested by printer DC
if drawcmd == 'SetFont' and hasattr(gc, 'font_scale'):
args[0].Scale(gc.font_scale)
if drawcmd == 'ConcatTransform':
cm = gc.CreateMatrix(*args, **kwargs)
args = (cm,)
if drawcmd == 'CreatePath':
gp = drawdict[drawcmd](*args, **kwargs)
continue
elif drawcmd == 'DrawPath':
args = (gp, args[1])
if drawcmd in drawdict:
try : ## cjcj 2020-07
drawdict[drawcmd](*args, **kwargs)
except :
print( f'error with {drawcmd=} {args} {kwargs} ' )
raise
# reset font scaling in case RenderPage call is repeated
if drawcmd == 'SetFont' and hasattr(gc, 'font_scale'):
args[0].Scale(1.0/gc.font_scale)
else:
pathdict = {'MoveToPoint': gp.MoveToPoint,
'AddLineToPoint': gp.AddLineToPoint,
'AddCurveToPoint': gp.AddCurveToPoint,
'AddRectangle': gp.AddRectangle,
'CloseSubpath': gp.CloseSubpath }
if drawcmd in pathdict:
pathdict[drawcmd](*args, **kwargs)
def FetchFonts(self, currentobject):
" Return the standard fonts in current page or form"
KEY_BaseFont = '/BaseFont'
KEY_FontDescriptor = '/FontDescriptor'
KEY_FontName = '/FontName'
pdf_fonts = {}
try:
fonts = currentobject["/Resources"].getObject()['/Font']
if fonts is not None :
for key in fonts:
if KEY_BaseFont in fonts[key] :
pdf_fonts[key] = fonts[key][KEY_BaseFont][1:] # without leading /
elif KEY_FontDescriptor in fonts[key] :
pdf_fonts[key] = fonts[key][KEY_FontDescriptor][KEY_FontName]
else :
pdf_fonts[key] = 'No Base Font'
except AttributeError:
if '/Resources' in currentobject :
raise
except KeyError:
print( f'key error getting font {key=}{fonts[key]} ')
pass
except TypeError: # None is not iterable
if fonts is None :
pass
else :
print( f'key error getting font {key=} {fonts[key]} ')
pass
return pdf_fonts
def ProcessOperators(self, opslist, pdf_fonts):
"""
Interpret each operation in opslist and return in drawlist.
"""
drawlist = []
path = []
for operand, operator in opslist :
g = self.gstate
if isinstance( operator , bytes) :
# coerce operator to text
operator = operator.decode()
if operator == 'cm' and operand: # new transformation matrix
# some operands need inverting because directions of y axis
# in pdf and graphics context are opposite
a, b, c, d, e, f = [float(n) for n in operand]
drawlist.append(['ConcatTransform', (a, -b, -c, d, e, -f), {}])
elif operator == 'q': # save state
self.saved_state.append(copy.deepcopy(g))
drawlist.append(['PushState', (), {}])
elif operator == 'Q': # restore state
self.gstate = self.saved_state.pop()
drawlist.append(['PopState', (), {}])
elif operator == 'gs' : # state from object
gs_page_resources = self.page["/Resources"].getObject()['/ExtGState']
gs_resource = self.gstate.LoadResource( gs_page_resources[ operand[0] ] )
# colour space
elif operator == 'CSxxx': # Colour Space
pass
elif operator == 'RG': # Stroke RGB
rs, gs, bs = [int(float(n)*255) for n in operand]
g.strokeRGB = wx.Colour(rs, gs, bs)
elif operator == 'rg': # Fill RGB
rf, gf, bf = [int(float(n)*255) for n in operand]
g.fillRGB = wx.Colour(rf, gf, bf)
elif operator == 'K': # Stroke CMYK
rs, gs, bs = self.ConvertCMYK(operand)
g.strokeRGB = wx.Colour(rs, gs, bs)
elif operator == 'k': # Fill CMYK
rf, gf, bf = self.ConvertCMYK(operand)
g.fillRGB = wx.Colour(rf, gf, bf)
elif operator == 'G' : # Stroke Greyscale 0=black 1=white
rs, gs, bs = self.ConvertGrey(operand)
g.strokeRGB = wx.Colour(rs, gs, bs)
elif operator == 'g' : # Stroke Greyscale 0=black 1=white
rf, gf, bf = self.ConvertGrey(operand)
g.fillRGB = wx.Colour(rf, gf, bf)
elif operator == 'w': # Line width
g.lineWidth = max(float(operand[0]), 1.0)
elif operator == 'J': # Line cap
ix = float(operand[0])
g.lineCapStyle = {0: wx.CAP_BUTT, 1: wx.CAP_ROUND,
2: wx.CAP_PROJECTING}[ix]
elif operator == 'j': # Line join
ix = float(operand[0])
g.lineJoinStyle = {0: wx.JOIN_MITER, 1: wx.JOIN_ROUND,
2: wx.JOIN_BEVEL}[ix]
elif operator == 'd': # Line dash pattern
g.lineDashArray = [int(n) for n in operand[0]]
g.lineDashPhase = int(operand[1])
elif operator in ('m', 'c', 'l', 're', 'v', 'y', 'h'): # path defining ops
NewClippingPathRequired = False
path.append([[float(n) for n in operand], operator])
elif operator in( 'W' ,'W*' ) : # Clipping path
'''
In the middle of creating a graphics path (
After the path has been painted, the clipping path in the graphics state shall be set to
the intersection of the current clipping path and the newly constructed path.
'''
NewClippingPathRequired = True
NewClippingRule = operator
elif operator in ('b', 'B', 'b*', 'B*', 'f', 'F', 'f*',
's', 'S', 'n'): # path drawing ops
drawlist.extend(self.DrawPath(path, operator))
if NewClippingPathRequired :
drawlist.extend( self.SetClippingPath( path , NewClippingRule) )
path = []
elif operator == 'BT': # begin text object
g.textMatrix = [1, 0, 0, 1, 0, 0]
g.textLineMatrix = [1, 0, 0, 1, 0, 0]
elif operator == 'ET': # end text object
continue
elif operator == 'Tm': # text matrix
g.textMatrix = [float(n) for n in operand]
g.textLineMatrix = [float(n) for n in operand]
elif operator == 'TL': # text leading
g.leading = float(operand[0])
elif operator == 'Tc': # character spacing
g.charSpacing = float(operand[0])
elif operator == 'Tw': # word spacing
g.wordSpacing = float(operand[0])
elif operator == 'Tz': # horizontal spacing percentg
g.horizontalScaling = float(operand[0])/100
elif operator == 'Ts': # super/subscript
g.textRise = float(operand[0])
elif operator == 'Td': # next line via offsets
g.textLineMatrix[4] += float(operand[0])
g.textLineMatrix[5] += float(operand[1])
g.textMatrix = copy.copy(g.textLineMatrix)
elif operator == 'Tf': # text font
current_font_name = operand[0]
current_font, current_font_encoding = FetchFontExtended(self.page , current_font_name , Debug=False)
try:
g.font = pdf_fonts[operand[0]]
except :
print( f' issue with font operand in command {operator} {operand[0]} {operand[1]} ' )
print(pdf_fonts)
print( '----------------------------' )
raise
g.fontSize = float(operand[1])
elif operator == 'T*': # next line via leading
g.textLineMatrix[4] += 0
g.textLineMatrix[5] -= g.leading if g.leading is not None else 0
g.textMatrix = copy.copy(g.textLineMatrix)
elif operator == 'Tj': # show text
drawlist.extend(self.DrawTextString(as_text( operand[0],encoding=current_font_encoding) ))
elif operator == "'" : # equiv to T* and Tj
g.textLineMatrix[4] += 0
g.textLineMatrix[5] -= g.leading if g.leading is not None else 0
g.textMatrix = copy.copy(g.textLineMatrix)
drawlist.extend(self.DrawTextString(
as_text( operand[0],encoding=current_font_encoding) ))
elif operator == '"' : # equiv to set word spacing, set character spacing T* and Tj
g.wordSpacing = float(operand[0])
g.charSpacing = float(operand[1])
g.textLineMatrix[4] += 0
g.textLineMatrix[5] -= g.leading if g.leading is not None else 0
g.textMatrix = copy.copy(g.textLineMatrix)
drawlist.extend(self.DrawTextString(
as_text( operand[2],encoding=current_font_encoding) ))
elif operator == 'TJ' : # show text and spacing
spacing = False
for el in operand :
for e2 in el :
if isinstance(e2 , PyPDF2.generic.NumberObject ) or isinstance(e2 , PyPDF2.generic.FloatObject ) :
# move back by n/1000 text units
#g.textLineMatrix[4] -= float(e2)*0.1
#g.textMatrix = copy.copy(g.textLineMatrix)
#g.textMatrix[4] -= float(e2)*0.1
#drawlist.extend(self.DrawTextString( b'' ) )
pass
else :
try:
e2a = as_text( e2,encoding=current_font_encoding)
drawlist.extend(self.DrawTextString( e2a ) )
except :
try:
e3 = "?" * len(e2)
drawlist.extend(self.DrawTextString( e3 ) )
except:
print( "TJ with odd operand {} of type {} ".format(e2, type(e2)))
pass
if spacing:
print('PDF operator {} has spacing unimplemented (operand {})'.format(operator, operand))
elif operator == 'Do': # invoke named XObject
if VERBOSE: print( f'Do operator invoking named XObject {operand[0]} {self.page=} {self.current_object} ' )
dlist = self.InsertXObject(operand[0])
if dlist: # may be unimplemented decode
drawlist.extend(dlist)
elif operator == 'INLINE IMAGE': # special pyPdf case + operand is a dict
dlist = self.InlineImage(operand)
if dlist: # may be unimplemented decode
drawlist.extend(dlist)
else: # report once
if operator not in self.unimplemented:
if VERBOSE: print(f'PDF {operator=} is not implemented {operand=} ')
self.unimplemented[operator] = 1
# Fix bitmap transform. Move the scaling from any transform matrix that precedes
# a DrawBitmap operation into the op itself - the width and height extracted from
# the bitmap is the size of the original PDF image not the size it is to be drawn
# rotation and stretching need to be checked as may have swapped ratios
for k in range(len(drawlist)-1):
if drawlist[k][0] == 'ConcatTransform' and drawlist[k+1][0] == 'DrawBitmap':
ctargs = list(drawlist[k][1])
bmargs = list(drawlist[k+1][1])
w = ctargs[0]
h = ctargs[3]
bmargs[2] = -ctargs[3] # y position
bmargs[3] = ctargs[0] # width
bmargs[4] = ctargs[3] # height
ctargs[0] = 1.0 #
ctargs[1] = ctargs[1] / w #
ctargs[2] = ctargs[2] / h
ctargs[3] = 1.0
drawlist[k][1] = tuple(ctargs)
drawlist[k+1][1] = tuple(bmargs)
return drawlist
def SetFont(self, pdfont, size):
"""
Returns :class:`wx.Font` instance from supplied pdf font information.
"""
global missing_fonts
self.knownfont = True
pdfont = pdfont.lower()
if pdfont.count('courier'):
family = wx.FONTFAMILY_MODERN
font = 'Courier New'
elif pdfont.count('helvetica'):
family = wx.FONTFAMILY_SWISS
font = 'Arial'
elif pdfont.count('times'):
family = wx.FONTFAMILY_ROMAN
font = 'Times New Roman'
elif pdfont.count('symbol'):
family = wx.FONTFAMILY_DEFAULT
font = 'Symbol'
elif pdfont.count('zapfdingbats'):
family = wx.FONTFAMILY_DEFAULT
font = 'Wingdings'
else:
if pdfont in missing_fonts :
pass
else :
missing_fonts.append( pdfont )
if VERBOSE: print('Unknown font %s' % pdfont)
self.knownfont = False
family = wx.FONTFAMILY_SWISS
font = 'Arial'
weight = wx.FONTWEIGHT_NORMAL
if pdfont.count('bold'):
weight = wx.FONTWEIGHT_BOLD
style = wx.FONTSTYLE_NORMAL
if pdfont.count('oblique') or pdfont.count('italic'):
style = wx.FONTSTYLE_ITALIC
return wx.Font(max(1, size), family, style, weight, faceName=font)
def DrawTextString(self, text):
"""
Draw a text string. Word spacing only works for horizontal text.
:param string `text`: the text to draw
"""
dlist = []
g = self.gstate
f0 = self.SetFont(g.font, g.fontSize)
f0.Scale(self.parent.font_scale_metrics)
f1 = self.SetFont(g.font, g.fontSize)
f1.Scale(self.parent.font_scale_size)
dlist.append( ['SetFont', (f1, g.GetFillRGBA() ), {}])
if g.wordSpacing > 0:
textlist = text.split() # was split on binary blank cjcj 2020-07
else:
textlist = [text,]
for item in textlist:
dlist.append(self.DrawTextItem(item, f0))
return dlist
def DrawTextSpace( self , adjust ) :
dlist = []
g = self.gstate
f0 = self.SetFont(g.font, g.fontSize)