diff --git a/README.rst b/README.rst index 898f61e3..914df5f4 100644 --- a/README.rst +++ b/README.rst @@ -255,9 +255,114 @@ Since going over all (or select) keys in an S3 bucket is a very common operation >>> prefix = 'annual/monthly_rain/' >>> for key, content in s3_iter_bucket(bucket, prefix=prefix, accept_key=lambda key: '/201' in key, workers=1, key_limit=3): ... print(key, round(len(content) / 2**20)) - annual/monthly_rain/2010.monthly_rain.nc 14 - annual/monthly_rain/2011.monthly_rain.nc 14 - annual/monthly_rain/2012.monthly_rain.nc 14 + annual/monthly_rain/2010.monthly_rain.nc 13 + annual/monthly_rain/2011.monthly_rain.nc 13 + annual/monthly_rain/2012.monthly_rain.nc 13 + + +Migrating to the New `open` Function +------------------------------------ + +Since 1.8.1, there is a `smart_open.open` function that replaces `smart_open.smart_open`. +The new function offers several advantages over the old one: + +- 100% compatible with the built-in `open` function (aka `io.open`): it accepts all + the parameters that the built-in `open` accepts. +- The default open mode is now "r", the same as for the built-in `open`. + The default for the old `smart_open.smart_open` function used to be "rb". +- Fully documented keyword parameters (try `help("smart_open.open")`) + +The instructions below will help you migrate to the new function painlessly. + +First, update your imports: + +.. code-block:: python + + >>> from smart_open import smart_open # before + >>> from smart_open import open # after + +In general, `smart_open` uses `io.open` directly, where possible, so if your +code already uses `open` for local file I/O, then it will continue to work. +If you want to continue using the built-in `open` function for e.g. debugging, +then you can `import smart_open` and use `smart_open.open`. + +**The default read mode is now "r" (read text) by default.** +If your code was implicitly relying on the default mode being "rb" (read +binary), then you'll need to update it and pass "r" explicitly. + +Before: + +.. code-block:: python + + >>> buf = b'' + >>> buf += smart_open('s3://commoncrawl/robots.txt').read(32) + +After: + +.. code-block:: python + + >>> buf = b'' + >>> buf += open('s3://commoncrawl/robots.txt', 'rb').read(32) + +The `ignore_extension` keyword parameter is now called `ignore_ext`. +It behaves identically otherwise. + +The most significant change is in the handling on keyword parameters for the +transport layer, e.g. HTTP, S3, etc. The old function accepted these directly: + +.. code-block:: python + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> smart_open(url, 'r', session=session).read(32) + 'first line\nsecond line\nthird lin' + +The new function accepts a `transport_params` keyword argument. It's a dict. +Put your transport parameters in that dictionary. + +.. code-block:: python + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> params = {'session': session} + >>> open(url, 'r', transport_params=params).read(32) + 'first line\nsecond line\nthird lin' + +Renamed parameters: + +- `s3_upload` ➡ `multipart_upload_kwargs` +- `s3_session` ➡ `session` + +Removed parameters: + +- `profile_name` + +**The `profile_name` parameter has been removed.** +Pass an entire boto3.Session object instead. + +Before: + +.. code-block:: python + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> smart_open(url, 'r', profile_name='smart_open').read(32) + 'first line\nsecond line\nthird lin' + +After: + +.. code-block:: python + + >>> url = 's3://smart-open-py37-benchmark-results/test.txt' + >>> session = boto3.Session(profile_name='smart_open') + >>> params = {'session': session} + >>> open(url, 'r', transport_params=params).read(32) + 'first line\nsecond line\nthird lin' + +See `help("smart_open.open")` for the full list of acceptable parameter names, +or view the help online `here `__. + +If you pass an invalid parameter names, the `open` function will warn you about it. +Keep an eye on your logs for WARNING messages from smart_open. Comments, bug reports --------------------- diff --git a/release/prepare.sh b/release/prepare.sh index e17bbc23..028998fd 100644 --- a/release/prepare.sh +++ b/release/prepare.sh @@ -32,6 +32,8 @@ set +u # work around virtualenv awkwardness source sandbox.venv/bin/activate set -u +python -m doctest ../README.rst + cd .. pip install -e .[test] # for smart_open pip install .[test] # for gensim @@ -56,6 +58,16 @@ read -p "Press Enter to continue..." $EDITOR CHANGELOG.md git commit CHANGELOG.md -m "updated CHANGELOG.md for version $version" +python -c 'help("smart_open")' > help.txt + +# +# The below command will fail if there are no changes to help.txt. +# We can safely ignore that failure. +# +set +e +git commit help.txt -m "updated help.txt for version $version" +set -e + echo "Have a look at the current branch, and if all looks good, run merge.sh" cd "$script_dir" diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 2ec806f0..26bc7316 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -380,9 +380,23 @@ def open( } +_MIGRATION_NOTES_URL = ( + 'https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst' + '#migrating-to-the-new-open-function' +) + + def smart_open(uri, mode="rb", **kw): - """Deprecated, use smart_open.open instead.""" - logger.warning('this function is deprecated, use smart_open.open instead') + """Deprecated, use smart_open.open instead. + + See the migration instructions: %s + + """ % _MIGRATION_NOTES_URL + + warnings.warn( + 'This function is deprecated, use smart_open.open instead. ' + 'See the migration notes for details: %s' % _MIGRATION_NOTES_URL + ) # # The new function uses a shorter name for this parameter, handle it separately.