Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow a pathlib.PurePath object as a input to open_filename #492

Merged
merged 11 commits into from
Sep 17, 2020
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Unreleased]

### Added
- Support for `pathlib.PurePath` in `open_filename` ([#491](https://github.com/pdfminer/pdfminer.six/issues/491))

### Fixed
- Pass caching parameter to PDFResourceManager in `high_level` functions ([#475](https://github.com/pdfminer/pdfminer.six/pull/475))

Expand Down
11 changes: 9 additions & 2 deletions pdfminer/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Miscellaneous Routines.
"""
import io
import pathlib
import struct
from html import escape

Expand All @@ -13,16 +15,21 @@

class open_filename(object):
"""
Context manager that allows opening a filename and closes it on exit,
Context manager that allows opening a filename
(str or pathlib.PurePath type is supported) and closes it on exit,
(just like `open`), but does nothing for file-like objects.
pietermarsman marked this conversation as resolved.
Show resolved Hide resolved
"""
def __init__(self, filename, *args, **kwargs):
if isinstance(filename, pathlib.PurePath):
filename = str(filename)
if isinstance(filename, str):
self.file_handler = open(filename, *args, **kwargs)
self.closing = True
pietermarsman marked this conversation as resolved.
Show resolved Hide resolved
else:
elif isinstance(filename, io.IOBase):
self.file_handler = filename
self.closing = False
else:
raise TypeError('Unsupported input type: %s' % type(filename))

def __enter__(self):
return self.file_handler
Expand Down
27 changes: 25 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,30 @@
from nose.tools import assert_equal
from nose.tools import assert_equal, assert_raises
import pathlib

from helpers import absolute_sample_path
from pdfminer.layout import LTComponent
from pdfminer.utils import Plane, shorten_str
from pdfminer.utils import open_filename, Plane, shorten_str


class TestOpenFilename:
def test_string_input(self):
filename = absolute_sample_path("simple1.pdf")
opened = open_filename(filename)
assert_equal(opened.closing, True)

def test_pathlib_input(self):
filename = pathlib.Path(absolute_sample_path("simple1.pdf"))
opened = open_filename(filename)
assert_equal(opened.closing, True)

def test_file_input(self):
filename = absolute_sample_path("simple1.pdf")
with open(filename, "rb") as in_file:
opened = open_filename(in_file)
assert_equal(opened.file_handler, in_file)

def test_unsupported_input(self):
assert_raises(TypeError, open_filename, 0)


class TestPlane:
Expand Down