From 6433b626230fb6fa7dcf0a4198611f69206ef2c3 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 28 May 2019 21:26:44 +0900 Subject: [PATCH 01/10] updated release script to refresh help.txt --- release/prepare.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/release/prepare.sh b/release/prepare.sh index e17bbc23..a9bc7180 100644 --- a/release/prepare.sh +++ b/release/prepare.sh @@ -56,6 +56,16 @@ read -p "Press Enter to continue..." $EDITOR CHANGELOG.md git commit CHANGELOG.md -m "updated CHANGELOG.md for version $version" +python -c 'help("smart_open")' > help.txt + +# +# The below command will fail if there are no changes to help.txt. +# We can safely ignore that failure. +# +set +e +git commit help.txt -m "updated help.txt for version $version" +set -e + echo "Have a look at the current branch, and if all looks good, run merge.sh" cd "$script_dir" From 88f213f82877e1ee97178742605b811e9bdf4814 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 28 May 2019 21:28:55 +0900 Subject: [PATCH 02/10] use warnings.warn instead of logger.warning --- smart_open/smart_open_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 2ec806f0..f022ef87 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -382,7 +382,7 @@ def open( def smart_open(uri, mode="rb", **kw): """Deprecated, use smart_open.open instead.""" - logger.warning('this function is deprecated, use smart_open.open instead') + warnings.warn('this function is deprecated, use smart_open.open instead') # # The new function uses a shorter name for this parameter, handle it separately. From 948838bd0acdb1e916c1542aa47560e9af5e1717 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 10:36:44 +0900 Subject: [PATCH 03/10] add migration notes --- README.rst | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/README.rst b/README.rst index 898f61e3..acadb58c 100644 --- a/README.rst +++ b/README.rst @@ -259,6 +259,90 @@ Since going over all (or select) keys in an S3 bucket is a very common operation annual/monthly_rain/2011.monthly_rain.nc 14 annual/monthly_rain/2012.monthly_rain.nc 14 +Migrating to the new open function +---------------------------------- + +Since 1.8.1, there is a `smart_open.open` function that replaces `smart_open.smart_open`. +The new function offers several advantages over the old one: + +- 100% compatible with the built-in open function (aka io.open): it accepts all + the parameters that the built-in open accepts. +- Default open mode is now "r", the same as for the built-in open +- Fully documented keyword parameters (try `help("smart_open.open")`) + +These instructions will help you migrate to the new function painlessly. + +First, update your imports: + + >>> from smart_open import smart_open # before + >>> from smart_open import open # after + +In general, `smart_open` uses `io.open` directly, where possible, so if your +code already uses `open` for local file I/O, then it will continue to work. +If you want to continue using the built-in `open` function for e.g. debugging, +then you can `import smart_open` and use `smart_open.open`. + +**The default read mode is now "r" (read text) by default.** +If your code was implicitly relying on the default mode being "rb" (read +binary), then you'll need to update it and pass "r" explicitly. + +Before: + + >>> buf = b'' + >>> buf += smart_open('s3://commoncrawl/robots.txt').read(32) + +After: + + >>> buf = b'' + >>> buf += open('s3://commoncrawl/robots.txt', 'rb').read(32) + +The `ignore_extension` keyword parameter is now called `ignore_ext`. +It behaves identically otherwise. + +The most significant change is in the handling on keyword parameters for the +transport layer, e.g. HTTP, S3, etc. The old function accepted these directly: + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> smart_open(url, 'r', session=session).read(32) + +The new function accepts a `transport_params` keyword argument. It's a dict. +Put your transport parameters in that dictionary. + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> params = {'session': session} + >>> open(url, 'r', transport_params=params).read(32) + +Renamed parameters: + +- `s3_upload` ➡ `multipart_upload_kwargs` +- `s3_session` ➡ `session` + +Removed parameters: + +- `profile_name` + +**The `profile_name` parameter has been removed.** +Pass an entire boto3.Session object instead. + +Before: + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> smart_open(url, 'r', profile_name='smart_open').read(32) + +After: + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> params = {'session': session} + >>> smart_open(url, 'r', transport_params=params).read(32) + +See `help("smart_open.open")` for the full list of acceptable parameter names. + +If you pass an invalid parameter names, the `open` function will warn you about it. +Keep an eye on your logs for WARNING messages from smart_open. + Comments, bug reports --------------------- From c57ae73c3580e8ce73a785e275e95ca5c3faf1a9 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 11:06:16 +0900 Subject: [PATCH 04/10] update README.rst to pass doctests --- README.rst | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index acadb58c..b32464da 100644 --- a/README.rst +++ b/README.rst @@ -255,9 +255,10 @@ Since going over all (or select) keys in an S3 bucket is a very common operation >>> prefix = 'annual/monthly_rain/' >>> for key, content in s3_iter_bucket(bucket, prefix=prefix, accept_key=lambda key: '/201' in key, workers=1, key_limit=3): ... print(key, round(len(content) / 2**20)) - annual/monthly_rain/2010.monthly_rain.nc 14 - annual/monthly_rain/2011.monthly_rain.nc 14 - annual/monthly_rain/2012.monthly_rain.nc 14 + annual/monthly_rain/2010.monthly_rain.nc 13 + annual/monthly_rain/2011.monthly_rain.nc 13 + annual/monthly_rain/2012.monthly_rain.nc 13 + Migrating to the new open function ---------------------------------- @@ -305,6 +306,7 @@ transport layer, e.g. HTTP, S3, etc. The old function accepted these directly: >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> session = boto3.Session(profile_name='smart_open') >>> smart_open(url, 'r', session=session).read(32) + 'first line\nsecond line\nthird lin' The new function accepts a `transport_params` keyword argument. It's a dict. Put your transport parameters in that dictionary. @@ -313,6 +315,7 @@ Put your transport parameters in that dictionary. >>> session = boto3.Session(profile_name='smart_open') >>> params = {'session': session} >>> open(url, 'r', transport_params=params).read(32) + 'first line\nsecond line\nthird lin' Renamed parameters: @@ -330,13 +333,15 @@ Before: >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> smart_open(url, 'r', profile_name='smart_open').read(32) + 'first line\nsecond line\nthird lin' After: >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> session = boto3.Session(profile_name='smart_open') >>> params = {'session': session} - >>> smart_open(url, 'r', transport_params=params).read(32) + >>> open(url, 'r', transport_params=params).read(32) + 'first line\nsecond line\nthird lin' See `help("smart_open.open")` for the full list of acceptable parameter names. From 9a85d055ba7140865335bcf1cf40389b6ac85478 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 11:07:42 +0900 Subject: [PATCH 05/10] add doctest to release script --- release/prepare.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/release/prepare.sh b/release/prepare.sh index a9bc7180..028998fd 100644 --- a/release/prepare.sh +++ b/release/prepare.sh @@ -32,6 +32,8 @@ set +u # work around virtualenv awkwardness source sandbox.venv/bin/activate set -u +python -m doctest ../README.rst + cd .. pip install -e .[test] # for smart_open pip install .[test] # for gensim From 88618e01e4d67e131e0f9881c6d940cac83cb306 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 11:21:13 +0900 Subject: [PATCH 06/10] link to migration notes from docstring and warning --- smart_open/smart_open_lib.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index f022ef87..35a93dda 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -380,9 +380,23 @@ def open( } +_MIGRATION_NOTES_URL = ( + 'https://github.com/rare-technologies/smart_open/blob/master/README.rst' + '#migrating-to-the-new-open-function' +) + + def smart_open(uri, mode="rb", **kw): - """Deprecated, use smart_open.open instead.""" - warnings.warn('this function is deprecated, use smart_open.open instead') + """Deprecated, use smart_open.open instead. + + See the migration notes for details: %s + + """ % _MIGRATION_NOTES_URL + + warnings.warn( + 'This function is deprecated, use smart_open.open instead. ' + 'See the migration notes for details: %s' % _MIGRATION_NOTES_URL + ) # # The new function uses a shorter name for this parameter, handle it separately. From beb3857d468ab001b16674d15ca49c258aebfb98 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 11:24:44 +0900 Subject: [PATCH 07/10] fix RST formatting --- README.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.rst b/README.rst index b32464da..6631e3c9 100644 --- a/README.rst +++ b/README.rst @@ -275,6 +275,8 @@ These instructions will help you migrate to the new function painlessly. First, update your imports: +.. code-block:: python + >>> from smart_open import smart_open # before >>> from smart_open import open # after @@ -289,11 +291,15 @@ binary), then you'll need to update it and pass "r" explicitly. Before: +.. code-block:: python + >>> buf = b'' >>> buf += smart_open('s3://commoncrawl/robots.txt').read(32) After: +.. code-block:: python + >>> buf = b'' >>> buf += open('s3://commoncrawl/robots.txt', 'rb').read(32) @@ -303,6 +309,8 @@ It behaves identically otherwise. The most significant change is in the handling on keyword parameters for the transport layer, e.g. HTTP, S3, etc. The old function accepted these directly: +.. code-block:: python + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> session = boto3.Session(profile_name='smart_open') >>> smart_open(url, 'r', session=session).read(32) @@ -311,6 +319,8 @@ transport layer, e.g. HTTP, S3, etc. The old function accepted these directly: The new function accepts a `transport_params` keyword argument. It's a dict. Put your transport parameters in that dictionary. +.. code-block:: python + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> session = boto3.Session(profile_name='smart_open') >>> params = {'session': session} @@ -331,12 +341,16 @@ Pass an entire boto3.Session object instead. Before: +.. code-block:: python + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> smart_open(url, 'r', profile_name='smart_open').read(32) 'first line\nsecond line\nthird lin' After: +.. code-block:: python + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' >>> session = boto3.Session(profile_name='smart_open') >>> params = {'session': session} From 5877193305563c6929ad59091c98dac7b4dc476a Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 11:29:57 +0900 Subject: [PATCH 08/10] minor fixup --- README.rst | 5 +++-- smart_open/smart_open_lib.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 6631e3c9..502b6eae 100644 --- a/README.rst +++ b/README.rst @@ -260,7 +260,7 @@ Since going over all (or select) keys in an S3 bucket is a very common operation annual/monthly_rain/2012.monthly_rain.nc 13 -Migrating to the new open function +Migrating to the New open Function ---------------------------------- Since 1.8.1, there is a `smart_open.open` function that replaces `smart_open.smart_open`. @@ -357,7 +357,8 @@ After: >>> open(url, 'r', transport_params=params).read(32) 'first line\nsecond line\nthird lin' -See `help("smart_open.open")` for the full list of acceptable parameter names. +See `help("smart_open.open")` for the full list of acceptable parameter names, +or view the help online `here `__. If you pass an invalid parameter names, the `open` function will warn you about it. Keep an eye on your logs for WARNING messages from smart_open. diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 35a93dda..486d1e83 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -381,7 +381,7 @@ def open( _MIGRATION_NOTES_URL = ( - 'https://github.com/rare-technologies/smart_open/blob/master/README.rst' + 'https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst' '#migrating-to-the-new-open-function' ) From 5485c86a9cd42978e21443e0ed8ab3891f53c0b7 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 21:06:06 +0900 Subject: [PATCH 09/10] Update README.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Radim Řehůřek --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 502b6eae..11659c84 100644 --- a/README.rst +++ b/README.rst @@ -260,7 +260,7 @@ Since going over all (or select) keys in an S3 bucket is a very common operation annual/monthly_rain/2012.monthly_rain.nc 13 -Migrating to the New open Function +Migrating to the new `open` function ---------------------------------- Since 1.8.1, there is a `smart_open.open` function that replaces `smart_open.smart_open`. From 69fa8de8d89e01b5c482b957d4d48b006918e69f Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Wed, 29 May 2019 23:00:15 +0900 Subject: [PATCH 10/10] respond to review comments --- README.rst | 13 +++++++------ smart_open/smart_open_lib.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 11659c84..914df5f4 100644 --- a/README.rst +++ b/README.rst @@ -260,18 +260,19 @@ Since going over all (or select) keys in an S3 bucket is a very common operation annual/monthly_rain/2012.monthly_rain.nc 13 -Migrating to the new `open` function ----------------------------------- +Migrating to the New `open` Function +------------------------------------ Since 1.8.1, there is a `smart_open.open` function that replaces `smart_open.smart_open`. The new function offers several advantages over the old one: -- 100% compatible with the built-in open function (aka io.open): it accepts all - the parameters that the built-in open accepts. -- Default open mode is now "r", the same as for the built-in open +- 100% compatible with the built-in `open` function (aka `io.open`): it accepts all + the parameters that the built-in `open` accepts. +- The default open mode is now "r", the same as for the built-in `open`. + The default for the old `smart_open.smart_open` function used to be "rb". - Fully documented keyword parameters (try `help("smart_open.open")`) -These instructions will help you migrate to the new function painlessly. +The instructions below will help you migrate to the new function painlessly. First, update your imports: diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 486d1e83..26bc7316 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -389,7 +389,7 @@ def open( def smart_open(uri, mode="rb", **kw): """Deprecated, use smart_open.open instead. - See the migration notes for details: %s + See the migration instructions: %s """ % _MIGRATION_NOTES_URL