diff --git a/appveyor.yml b/appveyor.yml index 65e62f887554e..a1f8886f6d068 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -74,12 +74,18 @@ install: # create our env - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas + - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + # build em using the local source checkout in the correct windows env - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 29ca69970104b..fd79f907625e9 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas +time pip install moto || exit 1 # build but don't install echo "[build em]" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d26689f2e6b4b..b85263daa1eac 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist +time pip install pytest-xdist moto if [ "$LINT" ]; then conda install flake8 diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index c7190c506ba18..dbc4f6cbd6509 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -5,3 +5,4 @@ cython pytest>=3.1.0 pytest-cov flake8 +moto diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2 new file mode 100644 index 0000000000000..1452896b05e9d Binary files /dev/null and b/pandas/tests/io/parser/data/tips.csv.bz2 differ diff --git a/pandas/tests/io/parser/data/tips.csv.gz b/pandas/tests/io/parser/data/tips.csv.gz new file mode 100644 index 0000000000000..3a131068b2a38 Binary files /dev/null and b/pandas/tests/io/parser/data/tips.csv.gz differ diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 3344243f8137a..27cc708889fa2 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -4,13 +4,20 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ - import os + import pytest +import moto import pandas.util.testing as tm from pandas import DataFrame from pandas.io.parsers import read_csv, read_table +from pandas.compat import BytesIO + + +@pytest.fixture(scope='module') +def tips_file(): + return os.path.join(tm.get_data_path(), 'tips.csv') @pytest.fixture(scope='module') @@ -19,6 +26,40 @@ def salaries_table(): return read_table(path) +@pytest.fixture(scope='module') +def s3_resource(tips_file): + pytest.importorskip('s3fs') + moto.mock_s3().start() + + test_s3_files = [ + ('tips.csv', tips_file), + ('tips.csv.gz', tips_file + '.gz'), + ('tips.csv.bz2', tips_file + '.bz2'), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, 'rb') as f: + conn.Bucket(bucket_name).put_object( + Key=s3_key, + Body=f) + + boto3 = pytest.importorskip('boto3') + # see gh-16135 + bucket = 'pandas-test' + + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket=bucket) + add_tips_files(bucket) + + conn.create_bucket(Bucket='cant_get_it', ACL='private') + add_tips_files('cant_get_it') + + yield conn + + moto.mock_s3().stop() + + @pytest.mark.network @pytest.mark.parametrize( "compression,extension", @@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode, class TestS3(object): - - def setup_method(self, method): - try: - import s3fs # noqa - except ImportError: - pytest.skip("s3fs not installed") - @tm.network def test_parse_public_s3_bucket(self): + pytest.importorskip('s3fs') + # more of an integration test due to the not-public contents portion + # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) @@ -74,8 +111,8 @@ def test_parse_public_s3_bucket(self): assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3n_bucket(self): + def test_parse_public_s3n_bucket(self, s3_resource): + # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -83,8 +120,7 @@ def test_parse_public_s3n_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3a_bucket(self): + def test_parse_public_s3a_bucket(self, s3_resource): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -92,8 +128,7 @@ def test_parse_public_s3a_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_nrows(self): + def test_parse_public_s3_bucket_nrows(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) @@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_chunked(self): + def test_parse_public_s3_bucket_chunked(self, s3_resource): # Read with a chunksize chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_chunked_python(self): + def test_parse_public_s3_bucket_chunked_python(self, s3_resource): # Read with a chunksize using the Python parser chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_python(self): + def test_parse_public_s3_bucket_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) @@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_infer_s3_compression(self): + def test_infer_s3_compression(self, s3_resource): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') @@ -160,8 +191,7 @@ def test_infer_s3_compression(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3_bucket_nrows_python(self): + def test_parse_public_s3_bucket_nrows_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) @@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_s3_fails(self): + def test_s3_fails(self, s3_resource): with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') @@ -180,21 +209,18 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv('s3://cant_get_it/') - @tm.network - def boto3_client_s3(self): + def test_read_csv_handles_boto_s3_object(self, + s3_resource, + tips_file): # see gh-16135 - # boto3 is a dependency of s3fs - import boto3 - client = boto3.client("s3") - - key = "/tips.csv" - bucket = "pandas-test" - s3_object = client.get_object(Bucket=bucket, Key=key) + s3_object = s3_resource.meta.client.get_object( + Bucket='pandas-test', + Key='tips.csv') - result = read_csv(s3_object["Body"]) + result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty - expected = read_csv(tm.get_data_path('tips.csv')) + expected = read_csv(tips_file) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 92147b46097b8..6a399f41975e5 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1,33 +1,32 @@ # pylint: disable=E1101 - -from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems -from datetime import datetime, date, time -import sys +import functools +import operator import os +import sys +import warnings +from datetime import datetime, date, time from distutils.version import LooseVersion from functools import partial - -import warnings from warnings import catch_warnings -import operator -import functools -import pytest -from numpy import nan import numpy as np +import pytest +from numpy import nan +import moto import pandas as pd +import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex -from pandas.io.formats.excel import ExcelFormatter -from pandas.io.parsers import read_csv +from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems +from pandas.core.config import set_option, get_option +from pandas.io.common import URLError from pandas.io.excel import ( ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer, _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter ) -from pandas.io.common import URLError +from pandas.io.formats.excel import ExcelFormatter +from pandas.io.parsers import read_csv from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -from pandas.core.config import set_option, get_option -import pandas.util.testing as tm def _skip_if_no_xlrd(): @@ -67,13 +66,6 @@ def _skip_if_no_excelsuite(): _skip_if_no_openpyxl() -def _skip_if_no_s3fs(): - try: - import s3fs # noqa - except ImportError: - pytest.skip('s3fs not installed, skipping') - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -605,14 +597,22 @@ def test_read_from_http_url(self): local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) - @tm.network(check_before_test=True) def test_read_from_s3_url(self): - _skip_if_no_s3fs() - - url = ('s3://pandas-test/test1' + self.ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1') - tm.assert_frame_equal(url_table, local_table) + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket="pandas-test") + file_name = os.path.join(self.dirpath, 'test1' + self.ext) + with open(file_name, 'rb') as f: + conn.Bucket("pandas-test").put_object(Key="test1" + self.ext, + Body=f) + + url = ('s3://pandas-test/test1' + self.ext) + url_table = read_excel(url) + local_table = self.get_exceldf('test1') + tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow def test_read_from_file_url(self): diff --git a/tox.ini b/tox.ini index 45ad7fc451e76..f055251581a93 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,7 @@ deps = xlrd six sqlalchemy + moto # cd to anything but the default {toxinidir} which # contains the pandas subdirectory and confuses