From 272bdb3f26435dbcd3b366f5a9593bc8916d7b63 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 10 Oct 2021 22:22:29 +0900 Subject: [PATCH 1/4] support working directly with file descriptors --- smart_open/compression.py | 1 + smart_open/smart_open_lib.py | 11 +++++++++++ smart_open/tests/test_smart_open.py | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/smart_open/compression.py b/smart_open/compression.py index ac66d62b..fe022460 100644 --- a/smart_open/compression.py +++ b/smart_open/compression.py @@ -120,6 +120,7 @@ def compression_wrapper(file_obj, mode, compression): elif compression == INFER_FROM_EXTENSION: try: filename = file_obj.name + filename.upper() # make sure this thing is a string except (AttributeError, TypeError): logger.warning( 'unable to transparently decompress %r because it ' diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index cd80f47f..6c91b118 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -390,6 +390,17 @@ def _open_binary_stream(uri, mode, transport_params): uri.name = getattr(uri, 'name', 'unknown') return uri + if isinstance(uri, int): + # + # We're working with a file descriptor. If we open it, its name is + # just the integer value, which isn't helpful. Unfortunately, there's + # no easy cross-platform way to go from a file descriptor to the filename, + # so we just give up here. The user will have to handle their own + # compression, etc. explicitly. + # + fobj = _builtin_open(uri, mode, closefd=False) + return fobj + if not isinstance(uri, str): raise TypeError("don't know how to handle uri %s" % repr(uri)) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 5a8b67dd..9f2b881f 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -2060,6 +2060,26 @@ def test_backwards_compatibility_wrapper(): smart_open.smart_open(fpath, unsupported_keyword_param=123) +def test_read_file_descriptor(): + with smart_open.open(__file__) as fin: + expected = fin.read() + + fd = os.open(__file__, os.O_RDONLY) + with smart_open.open(fd) as fin: + actual = fin.read() + + assert actual == expected + + +def test_write_file_descriptor(): + with tempfile.NamedTemporaryFile() as tmp: + with smart_open.open(os.open(tmp.name, os.O_WRONLY), 'wt') as fout: + fout.write("hello world") + + with smart_open.open(tmp, 'rt') as fin: + assert fin.read() == "hello world" + + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) unittest.main() From b828569856754b972dbd4b2c928efa8910c975a4 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 10 Oct 2021 22:42:15 +0900 Subject: [PATCH 2/4] skip fd tests on win --- smart_open/tests/test_smart_open.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 9f2b881f..766ffd91 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -2060,6 +2060,7 @@ def test_backwards_compatibility_wrapper(): smart_open.smart_open(fpath, unsupported_keyword_param=123) +@pytest.mark.skipif(os.name == "nt", reason="this test does not work on Windows") def test_read_file_descriptor(): with smart_open.open(__file__) as fin: expected = fin.read() @@ -2071,6 +2072,7 @@ def test_read_file_descriptor(): assert actual == expected +@pytest.mark.skipif(os.name == "nt", reason="this test does not work on Windows") def test_write_file_descriptor(): with tempfile.NamedTemporaryFile() as tmp: with smart_open.open(os.open(tmp.name, os.O_WRONLY), 'wt') as fout: From 49d021322d9712fc7265ef0a7213162f1d821fb8 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 10 Oct 2021 23:26:23 +0900 Subject: [PATCH 3/4] open by filename directly --- smart_open/tests/test_smart_open.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 766ffd91..14651a9a 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -2078,7 +2078,7 @@ def test_write_file_descriptor(): with smart_open.open(os.open(tmp.name, os.O_WRONLY), 'wt') as fout: fout.write("hello world") - with smart_open.open(tmp, 'rt') as fin: + with smart_open.open(tmp.name, 'rt') as fin: assert fin.read() == "hello world" From bc55dd0d0ea814492dad9ebdb969a0d434a65377 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 10 Oct 2021 23:26:39 +0900 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c8b5edd..c3c8a0e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Use pytest instead of parameterizedtestcase (PR [#657](https://github.com/RaRe-Technologies/smart_open/pull/657), [@mpenkov](https://github.com/mpenkov)) - Support container client and blob client for azure blob storage (PR [#652](https://github.com/RaRe-Technologies/smart_open/pull/652), [@cbare](https://github.com/cbare)) +- Support working directly with file descriptors (PR [#659](https://github.com/RaRe-Technologies/smart_open/pull/659), [@mpenkov](https://github.com/mpenkov)) # 5.2.1, 28 August 2021