From e0a1a24edfd1f0a453645510e8a4d79368f30de9 Mon Sep 17 00:00:00 2001 From: Andre Burkovski Date: Thu, 16 Apr 2020 22:53:52 +0200 Subject: [PATCH 1/8] fix(smart_open_lib): add missing newline parameter to _shortcut_open --- smart_open/smart_open_lib.py | 5 +++-- smart_open/tests/test_smart_open.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 02f3b18f..b5b647c7 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -187,6 +187,7 @@ def open( buffering=buffering, encoding=encoding, errors=errors, + newline=newline ) if fobj is not None: return fobj @@ -316,6 +317,7 @@ def _shortcut_open( buffering=-1, encoding=None, errors=None, + newline=None ): """Try to open the URI using the standard library io.open function. @@ -331,7 +333,6 @@ def _shortcut_open( :param str uri: A string indicating what to open. :param str mode: The mode to pass to the open function. - :param dict kw: :returns: The opened file :rtype: file """ @@ -347,7 +348,7 @@ def _shortcut_open( if extension in compression.get_supported_extensions() and not ignore_ext: return None - open_kwargs = {} + open_kwargs = {'newline': newline} if encoding is not None: open_kwargs['encoding'] = encoding diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index be9d3858..a931e4d8 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -7,6 +7,7 @@ # import bz2 +import csv import io import unittest import logging @@ -1051,6 +1052,21 @@ def test_append_binary_absolute_path(self): mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1) fout.write(self.as_bytes) + def test_compatibility_with_csv_under_windows(self): + rows = [{'c1': 'a1', 'c2': 'b1'}, {'c1': 'a2', 'c2': 'b2'}] + + expected = 'c1,c2\na1,b1\na2,b2\n' + + with smart_open.open('test_smart_open.csv', 'w+', newline='\n') as f: + out = csv.DictWriter(f, fieldnames=['c1', 'c2']) + out.writeheader() + out.writerows(rows) + + with open('test_smart_open.csv', 'r') as f: + content = f.read() + assert content == expected + os.remove('test_smart_open.csv') + @mock.patch('boto3.Session') def test_s3_mode_mock(self, mock_session): """Are s3:// open modes passed correctly?""" From cc0d28189e99c38e56d9905f5e585ce3df47549f Mon Sep 17 00:00:00 2001 From: Andre Burkovski Date: Thu, 16 Apr 2020 22:53:52 +0200 Subject: [PATCH 2/8] fix(smart_open_lib): add missing newline parameter to _shortcut_open --- smart_open/smart_open_lib.py | 5 +++-- smart_open/tests/test_smart_open.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 02f3b18f..b5b647c7 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -187,6 +187,7 @@ def open( buffering=buffering, encoding=encoding, errors=errors, + newline=newline ) if fobj is not None: return fobj @@ -316,6 +317,7 @@ def _shortcut_open( buffering=-1, encoding=None, errors=None, + newline=None ): """Try to open the URI using the standard library io.open function. @@ -331,7 +333,6 @@ def _shortcut_open( :param str uri: A string indicating what to open. :param str mode: The mode to pass to the open function. - :param dict kw: :returns: The opened file :rtype: file """ @@ -347,7 +348,7 @@ def _shortcut_open( if extension in compression.get_supported_extensions() and not ignore_ext: return None - open_kwargs = {} + open_kwargs = {'newline': newline} if encoding is not None: open_kwargs['encoding'] = encoding diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index be9d3858..a931e4d8 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -7,6 +7,7 @@ # import bz2 +import csv import io import unittest import logging @@ -1051,6 +1052,21 @@ def test_append_binary_absolute_path(self): mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1) fout.write(self.as_bytes) + def test_compatibility_with_csv_under_windows(self): + rows = [{'c1': 'a1', 'c2': 'b1'}, {'c1': 'a2', 'c2': 'b2'}] + + expected = 'c1,c2\na1,b1\na2,b2\n' + + with smart_open.open('test_smart_open.csv', 'w+', newline='\n') as f: + out = csv.DictWriter(f, fieldnames=['c1', 'c2']) + out.writeheader() + out.writerows(rows) + + with open('test_smart_open.csv', 'r') as f: + content = f.read() + assert content == expected + os.remove('test_smart_open.csv') + @mock.patch('boto3.Session') def test_s3_mode_mock(self, mock_session): """Are s3:// open modes passed correctly?""" From 257cab7d113a516ac777b1fac69f6a2b82699cdf Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Fri, 17 Apr 2020 10:47:21 +0900 Subject: [PATCH 3/8] minor fixes to get tests to pass --- smart_open/smart_open_lib.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index b5b647c7..72316c05 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -187,7 +187,7 @@ def open( buffering=buffering, encoding=encoding, errors=errors, - newline=newline + newline=newline, ) if fobj is not None: return fobj @@ -317,7 +317,7 @@ def _shortcut_open( buffering=-1, encoding=None, errors=None, - newline=None + newline=None, ): """Try to open the URI using the standard library io.open function. @@ -348,11 +348,12 @@ def _shortcut_open( if extension in compression.get_supported_extensions() and not ignore_ext: return None - open_kwargs = {'newline': newline} - + open_kwargs = {} if encoding is not None: open_kwargs['encoding'] = encoding mode = mode.replace('b', '') + if newline is not None: + open_kwargs['newline'] = newline # # binary mode of the builtin/stdlib open function doesn't take an errors argument From 9099ef2c46e7cebd251bcb875db21cad8af8006b Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Fri, 17 Apr 2020 10:55:53 +0900 Subject: [PATCH 4/8] unskip S3 test --- smart_open/tests/test_smart_open.py | 1 - 1 file changed, 1 deletion(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index a931e4d8..f5712cd9 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -887,7 +887,6 @@ def test_s3_read_moto(self): self.assertEqual(content[14:], smart_open_object.read()) # read the rest - @unittest.skip('seek functionality for S3 currently disabled because of Issue #152') @mock_s3 def test_s3_seek_moto(self): """Does seeking in S3 files work correctly?""" From 5cdb796269daef31f7850008d24f853b3c7ad222 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Fri, 17 Apr 2020 10:56:03 +0900 Subject: [PATCH 5/8] adjust newline unit tests --- smart_open/tests/test_smart_open.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index f5712cd9..8c67e896 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -1051,20 +1051,28 @@ def test_append_binary_absolute_path(self): mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1) fout.write(self.as_bytes) - def test_compatibility_with_csv_under_windows(self): - rows = [{'c1': 'a1', 'c2': 'b1'}, {'c1': 'a2', 'c2': 'b2'}] + def test_newline(self): + with mock.patch(_BUILTIN_OPEN, mock.Mock(return_value=self.bytesio)) as mock_open: + with smart_open.smart_open("/some/file.txt", "wb+", newline='\n') as fout: + mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1, newline='\n') + + def test_newline_csv(self): + # + # See https://github.com/RaRe-Technologies/smart_open/issues/477 + # + rows = [{'name': 'alice', 'color': 'aqua'}, {'name': 'bob', 'color': 'blue'}] + expected = 'name,color\nalice,aqua\nbob,blue\n' - expected = 'c1,c2\na1,b1\na2,b2\n' + with tempfile.NamedTemporaryFile() as tmp: + with smart_open.open(tmp.name, 'w+', newline='\n') as fout: + out = csv.DictWriter(fout, fieldnames=['name', 'color']) + out.writeheader() + out.writerows(rows) - with smart_open.open('test_smart_open.csv', 'w+', newline='\n') as f: - out = csv.DictWriter(f, fieldnames=['c1', 'c2']) - out.writeheader() - out.writerows(rows) + with open(tmp.name, 'r') as fin: + content = fin.read() - with open('test_smart_open.csv', 'r') as f: - content = f.read() assert content == expected - os.remove('test_smart_open.csv') @mock.patch('boto3.Session') def test_s3_mode_mock(self, mock_session): From 6f7aa5ed3ae8799f16af7b3810efe07b23505174 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Fri, 17 Apr 2020 10:59:58 +0900 Subject: [PATCH 6/8] update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 343e5e9b..82752e2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Unreleased + - Correctly pass `newline` parameter to built-in `open` function (PR [#478](https://github.com/RaRe-Technologies/smart_open/pull/478), [@burkovae](https://github.com/burkovae)) + # 1.11.1, 8 Apr 2020 - Add missing boto dependency (Issue [#468](https://github.com/RaRe-Technologies/smart_open/issues/468)) From 87365fe9e6f252471d089ee61ba4dfbd4d02de5d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Fri, 17 Apr 2020 11:09:12 +0900 Subject: [PATCH 7/8] fix flake8 --- smart_open/tests/test_smart_open.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 8c67e896..2e09c5d1 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -1053,8 +1053,8 @@ def test_append_binary_absolute_path(self): def test_newline(self): with mock.patch(_BUILTIN_OPEN, mock.Mock(return_value=self.bytesio)) as mock_open: - with smart_open.smart_open("/some/file.txt", "wb+", newline='\n') as fout: - mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1, newline='\n') + smart_open.smart_open("/some/file.txt", "wb+", newline='\n') + mock_open.assert_called_with("/some/file.txt", "wb+", buffering=-1, newline='\n') def test_newline_csv(self): # From 888b3b0775cabaafc040abc38d50693fb904a44c Mon Sep 17 00:00:00 2001 From: Andre Burkovski Date: Sat, 25 Apr 2020 15:21:31 +0200 Subject: [PATCH 8/8] fix: merge conflicts in changelog --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 741b10fd..19bf2fb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,8 @@ # Unreleased -<<<<<<< HEAD - Correctly pass `newline` parameter to built-in `open` function (PR [#478](https://github.com/RaRe-Technologies/smart_open/pull/478), [@burkovae](https://github.com/burkovae)) -======= - Prevent smart_open from writing to logs on import (PR [#476](https://github.com/RaRe-Technologies/smart_open/pull/476), [@mpenkov](https://github.com/mpenkov)) - Modify setup.py to explicitly support Py3.5 and above (PR [#471](https://github.com/RaRe-Technologies/smart_open/pull/471), [@Amertz08](https://github.com/Amertz08)) ->>>>>>> 931a5f5fc008d727e8a3b1863ceca3d54215f567 # 1.11.1, 8 Apr 2020