Skip to content

Commit

Permalink
Add PDF.path & fix .to_image() for zipped files
Browse files Browse the repository at this point in the history
These are related changes, since the previous approach of using
`PDF.stream.name` to distinguish between on-path and filelike-object PDFs
doesn't work for zipped files (and possibly other filelike-objects that
have a `name` property).
  • Loading branch information
jsvine committed Jul 29, 2023
1 parent 7af3d0f commit 30a52cb
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
13 changes: 9 additions & 4 deletions pdfplumber/display.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pathlib
from io import BufferedReader, BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union

import PIL.Image
Expand Down Expand Up @@ -35,14 +35,18 @@ class COLORS:

def get_page_image(
stream: Union[BufferedReader, BytesIO],
path: Optional[pathlib.Path],
page_ix: int,
resolution: Union[int, float],
password: Optional[str],
antialias: bool = False,
) -> PIL.Image.Image:

src: Union[pathlib.Path, BufferedReader, BytesIO]

# If we are working with a file object saved to disk
if hasattr(stream, "name"):
src = stream.name
if path:
src = path

# If we instead are working with a BytesIO stream
else:
Expand Down Expand Up @@ -79,6 +83,7 @@ def __init__(
if original is None:
self.original = get_page_image(
stream=page.pdf.stream,
path=page.pdf.path,
page_ix=page.page_number - 1,
resolution=resolution,
antialias=antialias,
Expand Down Expand Up @@ -133,7 +138,7 @@ def reset(self) -> "PageImage":

def save(
self,
dest: Union[str, Path, BytesIO],
dest: Union[str, pathlib.Path, BytesIO],
format: str = "PNG",
quantize: bool = True,
colors: int = 256,
Expand Down
11 changes: 10 additions & 1 deletion pdfplumber/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ def __init__(
self,
stream: Union[BufferedReader, BytesIO],
stream_is_external: bool = False,
path: Optional[pathlib.Path] = None,
pages: Optional[Union[List[int], Tuple[int]]] = None,
laparams: Optional[Dict[str, Any]] = None,
password: Optional[str] = None,
strict_metadata: bool = False,
):
self.stream = stream
self.stream_is_external = stream_is_external
self.path = path
self.pages_to_parse = pages
self.laparams = None if laparams is None else LAParams(**laparams)
self.password = password
Expand Down Expand Up @@ -70,20 +72,27 @@ def open(
repair: bool = False,
) -> "PDF":

stream: Union[str, pathlib.Path, BufferedReader, BytesIO]
stream: Union[BufferedReader, BytesIO]

if repair:
stream = _repair(path_or_fp, password=password)
stream_is_external = False
# Although the original file has a path,
# the repaired version does not
path = None
elif isinstance(path_or_fp, (str, pathlib.Path)):
stream = open(path_or_fp, "rb")
stream_is_external = False
path = pathlib.Path(path_or_fp)
else:
stream = path_or_fp
stream_is_external = True
path = None

try:
return cls(
stream,
path=path,
pages=pages,
laparams=laparams,
password=password,
Expand Down
Binary file added tests/pdfs/issue-948.zip
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/test_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import os
import unittest
from zipfile import ZipFile

import PIL.Image
import pytest
Expand Down Expand Up @@ -112,3 +113,13 @@ def test_password(self):
path = os.path.join(HERE, "pdfs/password-example.pdf")
with pdfplumber.open(path, password="test") as pdf:
pdf.pages[0].to_image()

def test_zip(self):
# See https://github.com/jsvine/pdfplumber/issues/948
# reproducer.py
path = os.path.join(HERE, "pdfs/issue-948.zip")
with ZipFile(path) as zip_file:
with zip_file.open("dummy.pdf") as pdf_file:
with pdfplumber.open(pdf_file) as pdf:
page = pdf.pages[0]
page.to_image()

0 comments on commit 30a52cb

Please sign in to comment.