Skip to content

Commit

Permalink
Removes native xz support
Browse files Browse the repository at this point in the history
Updates `register_compressor` example to show xz decompression.
  • Loading branch information
tdhopper committed Apr 10, 2019
1 parent ce3d8ac commit 102d5ef
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 51 deletions.
28 changes: 18 additions & 10 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ Other examples of URLs that ``smart_open`` accepts::
file:///home/user/file.bz2
[ssh|scp|sftp]://username@host//path/file
[ssh|scp|sftp]://username@host/path/file
file:///home/user/file.xz

.. _doctools_after_examples:

Expand Down Expand Up @@ -161,20 +160,29 @@ The tests are also run automatically with `Travis CI <https://travis-ci.org/RaRe
Supported Compression Formats
-----------------------------

``smart_open`` allows reading and writing gzip, bzip2 and xz files.
``smart_open`` allows reading and writing gzip and bzip2 files.
They are transparently handled over HTTP, S3, and other protocols, too, based on the extension of the file being opened.
You can easily add support for other file extensions and compression formats:
You can easily add support for other file extensions and compression formats.
For example, to open xz-compressed files:

.. code-block:: python
def _handle_lzma(file_obj, mode):
import lzma
return lzma.LZMAFile(filename=file_obj, mode=mode, format=lzma.FORMAT_ALONE)
import lzma, os
from smart_open import open, register_compressor
register_compressor('.lzma', _handle_lzma)
with open('file.lzma', ...) as fin:
pass
def _handle_xz(file_obj, mode):
return lzma.LZMAFile(filename=file_obj, mode=mode, format=lzma.FORMAT_XZ)
register_compressor('.xz', _handle_xz)
data_path = './smart_open/tests/test_data/crime-and-punishment.txt.xz'
with open(data_path) as f:
crime_and_punishment = f.read()
``lzma`` is in the standard library in Python 3.3 and greater.
For 2.7, use `backports.lzma`_.

.. _backports.lzma: https://pypi.org/project/backports.lzma/

Transport-specific Options
--------------------------
Expand Down
3 changes: 1 addition & 2 deletions help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ FUNCTIONS

- ``.gz``
- ``.bz2``
- ``.xz``


The function depends on the file extension to determine the appropriate codec.

Parameters
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def read(fname):
'bz2file',
'requests',
'boto3',
'backports.lzma;python_version<"3.3"',
],
tests_require=tests_require,
extras_require={
Expand Down
15 changes: 0 additions & 15 deletions smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,24 +107,11 @@ def _handle_gzip(file_obj, mode):
return gzip.GzipFile(fileobj=file_obj, mode=mode)


def _handle_xz(file_obj, mode):
#
# Delay import of compressor library until we actually need it
#
try:
import lzma
except ImportError:
# py<3.3
from backports import lzma
return lzma.LZMAFile(filename=file_obj, mode=mode, format=lzma.FORMAT_XZ)


#
# NB. avoid using lambda here to make stack traces more readable.
#
register_compressor('.bz2', _handle_bz2)
register_compressor('.gz', _handle_gzip)
register_compressor('.xz', _handle_xz)


Uri = collections.namedtuple(
Expand Down Expand Up @@ -224,7 +211,6 @@ def open(
- ``.gz``
- ``.bz2``
- ``.xz``
The function depends on the file extension to determine the appropriate codec.
Expand Down Expand Up @@ -596,7 +582,6 @@ def _parse_uri(uri_as_string):
* file:///home/user/file.bz2
* [ssh|scp|sftp]://username@host//path/file
* [ssh|scp|sftp]://username@host/path/file
* file:///home/user/file.xz
"""
if os.name == 'nt':
Expand Down
24 changes: 1 addition & 23 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _test_compressed_http(self, suffix, query):
smart_open_object = smart_open.smart_open(
'http://127.0.0.1/data%s%s' % (suffix, '?some_param=some_val' if query else ''))

# decompress the xz and get the same md5 hash
# decompress the file and get the same md5 hash
self.assertEqual(smart_open_object.read(), raw_data)

@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
Expand All @@ -268,10 +268,6 @@ def test_http_bz2(self):
"""Can open bzip2 via http?"""
self._test_compressed_http(".bz2", False)

def test_http_xz(self):
"""Can open xz via http?"""
self._test_compressed_http(".xz", False)

@unittest.skipIf(six.PY2, 'gzip support for Py2 is not implemented yet')
def test_http_gz_query(self):
"""Can open gzip via http with a query appended to URI?"""
Expand All @@ -281,10 +277,6 @@ def test_http_bz2_query(self):
"""Can open bzip2 via http with a query appended to URI?"""
self._test_compressed_http(".bz2", True)

def test_http_xz_query(self):
"""Can open xz via http with a query appended to URI?"""
self._test_compressed_http(".xz", True)


def make_buffer(cls=six.BytesIO, initial_value=None, name=None):
"""
Expand Down Expand Up @@ -1014,20 +1006,6 @@ def test_write_read_bz2(self):
"""Can write and read bz2?"""
self.write_read_assertion('.bz2')

def test_write_read_xz(self):
"""Can write and read xz2?"""
self.write_read_assertion('.xz')

def test_read_real_xz(self):
"""Can read a real xz file."""
base_path = os.path.join(CURR_DIR, 'test_data/crime-and-punishment.txt')
head_path = os.path.join(CURR_DIR, 'test_data/crime-and-punishment.txt.xz')
with smart_open.smart_open(head_path) as f:
smart_data = f.read()
with open(base_path, 'rb') as f:
orig_data = f.read()
self.assertEqual(smart_data, orig_data)


class MultistreamsBZ2Test(unittest.TestCase):
"""
Expand Down

0 comments on commit 102d5ef

Please sign in to comment.