Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for pathlib.Path. Fix #170 #175

Merged
merged 9 commits into from
Mar 22, 2018
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def read(fname):
tests_require = [
'mock',
'moto==0.4.31',
'pathlib2',
'responses',
]

Expand Down
16 changes: 16 additions & 0 deletions smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,20 @@
import subprocess
import sys
import requests
import importlib
import io
import warnings

# Import ``pathlib`` if the builtin ``pathlib`` or the backport ``pathlib2`` are
# available. The builtin ``pathlib`` will be imported with higher precedence.
for pathlib_module in ('pathlib', 'pathlib2'):
try:
pathlib = importlib.import_module(pathlib_module)
PATHLIB_SUPPORT = True
break
except ImportError:
PATHLIB_SUPPORT = False

from boto.compat import BytesIO, urlsplit, six
import boto.s3.connection
import boto.s3.key
Expand Down Expand Up @@ -102,6 +113,7 @@ def smart_open(uri, mode="rb", **kw):
3. a URI for Amazon's S3 (can also supply credentials inside the URI):
`s3://my_bucket/lines.txt`, `s3://my_aws_key_id:key_secret@my_bucket/lines.txt`
4. an instance of the boto.s3.key.Key class.
5. an instance of the pathlib.Path class.

Examples::

Expand Down Expand Up @@ -163,6 +175,10 @@ def smart_open(uri, mode="rb", **kw):
if not isinstance(mode, six.string_types):
raise TypeError('mode should be a string')

# Support opening ``pathlib.Path`` objects by casting them to strings.
if PATHLIB_SUPPORT and isinstance(uri, pathlib.Path):
uri = str(uri)

if isinstance(uri, six.string_types):
# this method just routes the request to classes handling the specific storage
# schemes, depending on the URI protocol in `uri`
Expand Down
14 changes: 14 additions & 0 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,20 @@ def test_open_with_keywords_explicit_r(self):
actual = fin.read()
self.assertEqual(expected, actual)

@unittest.skipUnless(
smart_open_lib.PATHLIB_SUPPORT,
"do not test pathlib support if pathlib or backport are not available")
def test_open_and_read_pathlib_path(self):
"""If ``pathlib.Path`` is available we should be able to open and read."""
from smart_open.smart_open_lib import pathlib

fpath = os.path.join(CURR_DIR, 'test_data/cp852.tsv.txt')
with open(fpath, 'rb') as fin:
expected = fin.read().decode('cp852')
with smart_open.smart_open(pathlib.Path(fpath), mode='r', encoding='cp852') as fin:
actual = fin.read()
self.assertEqual(expected, actual)

@mock_s3
def test_read_never_returns_none(self):
"""read should never return None."""
Expand Down