Skip to content

Commit

Permalink
Merge pull request #42 from jsvine/issue-41
Browse files Browse the repository at this point in the history
Fix issue #41 and bump to v0.5.6
  • Loading branch information
jsvine authored Nov 22, 2017
2 parents 731faf4 + fd25a6a commit 6afad61
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 13 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# PDFPlumber `v0.5.5`
# PDFPlumber `v0.5.6`

Plumb a PDF for detailed information about each text character, rectangle, and line. Plus: Table extraction and visual debugging.

Expand Down
2 changes: 1 addition & 1 deletion pdfplumber/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
version_info = (0, 5, 5)
version_info = (0, 5, 6)
__version__ = '.'.join(map(str, version_info))
27 changes: 16 additions & 11 deletions pdfplumber/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .container import Container
from copy import copy

from pdfminer.pdftypes import resolve_all
from six import string_types
import re
lt_pat = re.compile(r"^LT")
Expand All @@ -19,22 +20,26 @@ def __init__(self, pdf, page_obj, page_number=None, initial_doctop=0):
self.page_obj.rotate = self.rotation
self.initial_doctop = self.decimalize(initial_doctop)

cropbox = page_obj.attrs.get("CropBox", page_obj.attrs.get("MediaBox"))
self.cropbox = self.decimalize(cropbox)
cropbox = page_obj.attrs.get("CropBox")
mediabox = page_obj.attrs.get("MediaBox")

self.cropbox = self.decimalize(resolve_all(cropbox)) if cropbox is not None else None
self.mediabox = self.decimalize(resolve_all(mediabox) or self.cropbox)
m = self.mediabox

if self.rotation in [ 90, 270 ]:
self.bbox = self.decimalize((
min(cropbox[1], cropbox[3]),
min(cropbox[0], cropbox[2]),
max(cropbox[1], cropbox[3]),
max(cropbox[0], cropbox[2]),
min(m[1], m[3]),
min(m[0], m[2]),
max(m[1], m[3]),
max(m[0], m[2]),
))
else:
self.bbox = self.decimalize((
min(cropbox[0], cropbox[2]),
min(cropbox[1], cropbox[3]),
max(cropbox[0], cropbox[2]),
max(cropbox[1], cropbox[3]),
min(m[0], m[2]),
min(m[1], m[3]),
max(m[0], m[2]),
max(m[1], m[3]),
))

def decimalize(self, x):
Expand Down Expand Up @@ -92,7 +97,7 @@ def point2coord(pt):
]

def process_object(obj):
attr = dict((k, (v if k in NON_DECIMALIZE else d(v)))
attr = dict((k, (v if (k in NON_DECIMALIZE or v == None) else d(v)))
for k, v in obj.__dict__.items()
if k not in IGNORE)

Expand Down

0 comments on commit 6afad61

Please sign in to comment.