From 8f149d8e5ed8e5ae6e1bb7242757bd714fb472b6 Mon Sep 17 00:00:00 2001 From: Kirk Hansen Date: Fri, 25 Aug 2017 00:39:12 -0500 Subject: [PATCH] TST: Made s3 related tests mock boto Kept a couple around for testing things like accessing a private bucket as that's hard to mock. Try the pip counterparts Some more merge request changes --- appveyor.yml | 6 ++ ci/install_circle.sh | 1 + ci/install_travis.sh | 2 +- ci/requirements-2.7_WIN.pip | 0 ci/requirements-3.6_NUMPY_DEV.pip | 0 ci/requirements-3.6_WIN.pip | 0 ci/requirements_dev.txt | 1 + pandas/tests/io/parser/data/tips.csv.bz2 | Bin 0 -> 1316 bytes pandas/tests/io/parser/data/tips.csv.gz | Bin 0 -> 1740 bytes pandas/tests/io/parser/test_network.py | 100 ++++++++++++++--------- pandas/tests/io/test_excel.py | 58 ++++++------- tox.ini | 1 + 12 files changed, 102 insertions(+), 67 deletions(-) create mode 100644 ci/requirements-2.7_WIN.pip create mode 100644 ci/requirements-3.6_NUMPY_DEV.pip create mode 100644 ci/requirements-3.6_WIN.pip create mode 100644 pandas/tests/io/parser/data/tips.csv.bz2 create mode 100644 pandas/tests/io/parser/data/tips.csv.gz diff --git a/appveyor.yml b/appveyor.yml index 65e62f887554e..a1f8886f6d068 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -74,12 +74,18 @@ install: # create our env - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist - cmd: activate pandas + - cmd: pip install moto - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run - cmd: echo "installing requirements from %REQ%" - cmd: conda install -n pandas --file=%REQ% - cmd: conda list -n pandas - cmd: echo "installing requirements from %REQ% - done" + # add some pip only reqs to the env + - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip + - cmd: echo "installing requirements from %REQ%" + - cmd: pip install -Ur %REQ% + # build em using the local source checkout in the correct windows env - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' diff --git a/ci/install_circle.sh b/ci/install_circle.sh index 29ca69970104b..fd79f907625e9 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 source activate pandas +time pip install moto || exit 1 # build but don't install echo "[build em]" diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d26689f2e6b4b..b85263daa1eac 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 -time pip install pytest-xdist +time pip install pytest-xdist moto if [ "$LINT" ]; then conda install flake8 diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index c7190c506ba18..dbc4f6cbd6509 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -5,3 +5,4 @@ cython pytest>=3.1.0 pytest-cov flake8 +moto diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6 GIT binary patch literal 1316 zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs zYfM$)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02| zUew#%bX<$c*~vCvMH>_%oV^S&6a+#ukskADG3ECrBRBE^v4aChy? zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@dCkFG(R6{D)ETHdAf-8>fnW#-GXdM4pE5VK!{hIp z4{*7H7hK39V*E6-z)7yKmA;#^4 z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6 z1SFlk-!Cn`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq# z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FVmX?rYPxs1DA(^whpU+gQLdb{bOK!0;_ zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6 zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x| zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{ ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY! zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6 zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{ zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C3XK=k7;*$ zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL? zJT_x>_m~9QgT~p5h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 3344243f8137a..27cc708889fa2 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -4,13 +4,20 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ - import os + import pytest +import moto import pandas.util.testing as tm from pandas import DataFrame from pandas.io.parsers import read_csv, read_table +from pandas.compat import BytesIO + + +@pytest.fixture(scope='module') +def tips_file(): + return os.path.join(tm.get_data_path(), 'tips.csv') @pytest.fixture(scope='module') @@ -19,6 +26,40 @@ def salaries_table(): return read_table(path) +@pytest.fixture(scope='module') +def s3_resource(tips_file): + pytest.importorskip('s3fs') + moto.mock_s3().start() + + test_s3_files = [ + ('tips.csv', tips_file), + ('tips.csv.gz', tips_file + '.gz'), + ('tips.csv.bz2', tips_file + '.bz2'), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, 'rb') as f: + conn.Bucket(bucket_name).put_object( + Key=s3_key, + Body=f) + + boto3 = pytest.importorskip('boto3') + # see gh-16135 + bucket = 'pandas-test' + + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket=bucket) + add_tips_files(bucket) + + conn.create_bucket(Bucket='cant_get_it', ACL='private') + add_tips_files('cant_get_it') + + yield conn + + moto.mock_s3().stop() + + @pytest.mark.network @pytest.mark.parametrize( "compression,extension", @@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode, class TestS3(object): - - def setup_method(self, method): - try: - import s3fs # noqa - except ImportError: - pytest.skip("s3fs not installed") - @tm.network def test_parse_public_s3_bucket(self): + pytest.importorskip('s3fs') + # more of an integration test due to the not-public contents portion + # can probably mock this though. for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, compression=comp) @@ -74,8 +111,8 @@ def test_parse_public_s3_bucket(self): assert not df.empty tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3n_bucket(self): + def test_parse_public_s3n_bucket(self, s3_resource): + # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -83,8 +120,7 @@ def test_parse_public_s3n_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3a_bucket(self): + def test_parse_public_s3a_bucket(self, s3_resource): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) @@ -92,8 +128,7 @@ def test_parse_public_s3a_bucket(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_nrows(self): + def test_parse_public_s3_bucket_nrows(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) @@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_parse_public_s3_bucket_chunked(self): + def test_parse_public_s3_bucket_chunked(self, s3_resource): # Read with a chunksize chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_chunked_python(self): + def test_parse_public_s3_bucket_chunked_python(self, s3_resource): # Read with a chunksize using the Python parser chunksize = 5 local_tips = read_csv(tm.get_data_path('tips.csv')) @@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self): chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - @tm.network - def test_parse_public_s3_bucket_python(self): + def test_parse_public_s3_bucket_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) @@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_infer_s3_compression(self): + def test_infer_s3_compression(self, s3_resource): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') @@ -160,8 +191,7 @@ def test_infer_s3_compression(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')), df) - @tm.network - def test_parse_public_s3_bucket_nrows_python(self): + def test_parse_public_s3_bucket_nrows_python(self, s3_resource): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) @@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self): tm.assert_frame_equal(read_csv( tm.get_data_path('tips.csv')).iloc[:10], df) - @tm.network - def test_s3_fails(self): + def test_s3_fails(self, s3_resource): with pytest.raises(IOError): read_csv('s3://nyqpug/asdf.csv') @@ -180,21 +209,18 @@ def test_s3_fails(self): with pytest.raises(IOError): read_csv('s3://cant_get_it/') - @tm.network - def boto3_client_s3(self): + def test_read_csv_handles_boto_s3_object(self, + s3_resource, + tips_file): # see gh-16135 - # boto3 is a dependency of s3fs - import boto3 - client = boto3.client("s3") - - key = "/tips.csv" - bucket = "pandas-test" - s3_object = client.get_object(Bucket=bucket, Key=key) + s3_object = s3_resource.meta.client.get_object( + Bucket='pandas-test', + Key='tips.csv') - result = read_csv(s3_object["Body"]) + result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8') assert isinstance(result, DataFrame) assert not result.empty - expected = read_csv(tm.get_data_path('tips.csv')) + expected = read_csv(tips_file) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 92147b46097b8..6a399f41975e5 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -1,33 +1,32 @@ # pylint: disable=E1101 - -from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems -from datetime import datetime, date, time -import sys +import functools +import operator import os +import sys +import warnings +from datetime import datetime, date, time from distutils.version import LooseVersion from functools import partial - -import warnings from warnings import catch_warnings -import operator -import functools -import pytest -from numpy import nan import numpy as np +import pytest +from numpy import nan +import moto import pandas as pd +import pandas.util.testing as tm from pandas import DataFrame, Index, MultiIndex -from pandas.io.formats.excel import ExcelFormatter -from pandas.io.parsers import read_csv +from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems +from pandas.core.config import set_option, get_option +from pandas.io.common import URLError from pandas.io.excel import ( ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer, _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter ) -from pandas.io.common import URLError +from pandas.io.formats.excel import ExcelFormatter +from pandas.io.parsers import read_csv from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf -from pandas.core.config import set_option, get_option -import pandas.util.testing as tm def _skip_if_no_xlrd(): @@ -67,13 +66,6 @@ def _skip_if_no_excelsuite(): _skip_if_no_openpyxl() -def _skip_if_no_s3fs(): - try: - import s3fs # noqa - except ImportError: - pytest.skip('s3fs not installed, skipping') - - _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] @@ -605,14 +597,22 @@ def test_read_from_http_url(self): local_table = self.get_exceldf('test1') tm.assert_frame_equal(url_table, local_table) - @tm.network(check_before_test=True) def test_read_from_s3_url(self): - _skip_if_no_s3fs() - - url = ('s3://pandas-test/test1' + self.ext) - url_table = read_excel(url) - local_table = self.get_exceldf('test1') - tm.assert_frame_equal(url_table, local_table) + boto3 = pytest.importorskip('boto3') + pytest.importorskip('s3fs') + + with moto.mock_s3(): + conn = boto3.resource("s3", region_name="us-east-1") + conn.create_bucket(Bucket="pandas-test") + file_name = os.path.join(self.dirpath, 'test1' + self.ext) + with open(file_name, 'rb') as f: + conn.Bucket("pandas-test").put_object(Key="test1" + self.ext, + Body=f) + + url = ('s3://pandas-test/test1' + self.ext) + url_table = read_excel(url) + local_table = self.get_exceldf('test1') + tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow def test_read_from_file_url(self): diff --git a/tox.ini b/tox.ini index 45ad7fc451e76..f055251581a93 100644 --- a/tox.ini +++ b/tox.ini @@ -19,6 +19,7 @@ deps = xlrd six sqlalchemy + moto # cd to anything but the default {toxinidir} which # contains the pandas subdirectory and confuses