Skip to content

Commit

Permalink
Replace snappy with cramjam (capitalone#1091)
Browse files Browse the repository at this point in the history
* add downloads tile (capitalone#1085)

* Replace snappy with cramjam

* Delete test_no_snappy

---------

Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
  • Loading branch information
2 people authored and abajpai15 committed Apr 1, 2024
1 parent 9cd11db commit b301dd9
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ repos:
pyarrow>=1.0.1,
chardet>=3.0.4,
fastavro>=1.0.0.post1,
python-snappy>=0.5.4,
cramjam>=2.7.0,
charset-normalizer>=1.3.6,
psutil>=4.0.0,
scipy>=1.4.1,
Expand Down
16 changes: 0 additions & 16 deletions dataprofiler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,6 @@
from .validators.base_validators import Validator
from .version import __version__

try:
import snappy
except ImportError:
import warnings

warnings.warn(
"Snappy must be installed to use parquet/avro datasets."
"\n\n"
"For macOS use Homebrew:\n"
"\t`brew install snappy`"
"\n\n"
"For linux use apt-get:\n`"
"\tsudo apt-get -y install libsnappy-dev`\n",
ImportWarning,
)


def set_seed(seed=None):
# also check it's an integer
Expand Down
40 changes: 0 additions & 40 deletions dataprofiler/tests/test_data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,46 +56,6 @@ def test_data_profiling(self):
self.assertIsNotNone(profile.profile)
self.assertIsNotNone(profile.report())

def test_no_snappy(self):
import importlib
import sys
import types

orig_import = __import__
# necessary for any wrapper around the library to test if snappy caught
# as an issue

def reload_data_profiler():
"""Recursively reload modules."""
sys_modules = sys.modules.copy()
for module_name, module in sys_modules.items():
# Only reload top level of the dataprofiler
if "dataprofiler" in module_name and len(module_name.split(".")) < 3:
if isinstance(module, types.ModuleType):
importlib.reload(module)

def import_mock(name, *args, **kwargs):
if name == "snappy":
raise ImportError("test")
return orig_import(name, *args, **kwargs)

with mock.patch("builtins.__import__", side_effect=import_mock):
with self.assertWarns(ImportWarning) as w:
import dataprofiler

reload_data_profiler()

self.assertEqual(
str(w.warning),
"Snappy must be installed to use parquet/avro datasets."
"\n\n"
"For macOS use Homebrew:\n"
"\t`brew install snappy`"
"\n\n"
"For linux use apt-get:\n`"
"\tsudo apt-get -y install libsnappy-dev`\n",
)

def test_no_tensorflow(self):
import sys

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pytz>=2020.1
pyarrow>=1.0.1
chardet>=3.0.4
fastavro>=1.0.0.post1
python-snappy>=0.5.4
cramjam>=2.7.0
charset-normalizer>=1.3.6
psutil>=4.0.0
scipy>=1.10.0
Expand Down

0 comments on commit b301dd9

Please sign in to comment.