diff --git a/build-dist.sh b/build-dist.sh index ccc37bc..71ed47c 100755 --- a/build-dist.sh +++ b/build-dist.sh @@ -7,14 +7,14 @@ echo "Building Disdat package for local installation or PyPi . . ." # and then: git tag # Remove the prior tar ball from the context.template -rm -rf disdatluigi/infrastructure/dockerizer/context.template/disdat-luigi-*.tar.gz -rm -rf dist/disdat-luigi-*.tar.gz +rm -rf disdatluigi/infrastructure/dockerizer/context.template/disdat_luigi-*.tar.gz +rm -rf dist/disdat_luigi-*.tar.gz # Create a new sdist python setup.py sdist # Copy over to the context.template. -cp dist/disdat-luigi-*.tar.gz disdatluigi/infrastructure/dockerizer/context.template/. +cp dist/disdat_luigi-*.tar.gz disdatluigi/infrastructure/dockerizer/context.template/. # Create a new sdist that will have that tar.gz in the template python setup.py sdist @@ -25,7 +25,7 @@ if false; then #twine upload --repository-url https://test.pypi.org/legacy/ dist/disdat-*.tar.gz # Test: pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple disdat # now do it for real - twine upload dist/disdat-luigi-*.tar.gz + twine upload dist/disdat_luigi-*.tar.gz fi echo "Finished" diff --git a/disdatluigi/apply.py b/disdatluigi/apply.py index 1b6b210..5eed26e 100644 --- a/disdatluigi/apply.py +++ b/disdatluigi/apply.py @@ -69,10 +69,10 @@ def apply(output_bundle, pipe_params, pipe_cls, input_tags, output_tags, force, _logger.debug("incremental_pull {}".format(incremental_pull)) if incremental_push: - _logger.warn("incremental_push {}".format(incremental_push)) + _logger.warning("incremental_push {}".format(incremental_push)) if incremental_pull: - _logger.warn("incremental_pull {}".format(incremental_pull)) + _logger.warning("incremental_pull {}".format(incremental_pull)) pfs = fs.DisdatFS() diff --git a/disdatluigi/entrypoints/sagemaker_ep.py b/disdatluigi/entrypoints/sagemaker_ep.py index da573ad..03c1075 100644 --- a/disdatluigi/entrypoints/sagemaker_ep.py +++ b/disdatluigi/entrypoints/sagemaker_ep.py @@ -90,7 +90,7 @@ def train(): _logger.error("Disdat SageMaker train entrypoint failed.") sys.exit(os.EX_IOERR) elif args.purpose == 'serve': - _logger.warn("Disdat does not yet support SageMaker serve.") + _logger.warning("Disdat does not yet support SageMaker serve.") sys.exit(os.EX_UNAVAILABLE) else: _logger.error("Disdat SageMaker invoked entrypoint with {}, not 'train' or 'serve'".format(args.purpose)) diff --git a/disdatluigi/utility/aws_batch.py b/disdatluigi/utility/aws_batch.py index 59770bf..1f2bbf3 100644 --- a/disdatluigi/utility/aws_batch.py +++ b/disdatluigi/utility/aws_batch.py @@ -164,7 +164,7 @@ def ecr_create_fq_respository_name(repository_name, policy_resource_package=None ) repository_metadata = response['repositories'][0] elif e.response['Error']['Code'] == 'AccessDeniedException': - _logger.warn("Error [AccessDeniedException] when creating repo {}, trying to continue...".format(repository_name)) + _logger.warning("Error [AccessDeniedException] when creating repo {}, trying to continue...".format(repository_name)) else: raise e return repository_metadata['repositoryUri'] diff --git a/setup.py b/setup.py index 38bcf89..b6b5a12 100644 --- a/setup.py +++ b/setup.py @@ -90,10 +90,10 @@ # If <= means higher versions broke something. install_requires=[ - 'disdat>=1.0.0', - 'luigi>=3.0,<=3.1', + 'disdat>=1.1.3,<1.2', + 'luigi>=3.0,<3.6', 'boto3>=1.14.49,<2.0', - 'docker>=4.1.0,<4.4.0', + 'docker>=7.0.0,<7.2.0', ], # List additional groups of dependencies here (e.g. development @@ -108,7 +108,8 @@ 'pylint', 'coverage', 'tox', - 'moto', + 'moto>=5', + 'fastparquet', 's3fs<=0.4.2' # 0.5.0 breaks with aiobotocore and missing AWS headers ], 'rel': [ diff --git a/tests/bundles/test_arg_capture.py b/tests/bundles/test_arg_capture.py index 3a5a156..1bd37e5 100644 --- a/tests/bundles/test_arg_capture.py +++ b/tests/bundles/test_arg_capture.py @@ -17,7 +17,7 @@ import disdat.api as dsdt_api from disdatluigi.pipe import PipeTask import disdatluigi.api as api -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT test_luigi_args_data = {'str_arg': 'some string', 'int_arg': 10, @@ -39,7 +39,7 @@ def pipe_run(self): return True -def test_luigi_args(run_test): +def test_luigi_args(): """ Create a task, store args, retrieve from bundle api. Pass in python objects as the values for Luigi parameters. Stored as serialized json objects. Bundle presents the parameters diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..8075dfe --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,58 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import pytest +import disdat.api as api + +TEST_CONTEXT = '___test_context___' + +""" +NOTE: When testing we try to use Moto to mock-out the s3 calls. Pyarrow sidesteps boto by using its own S3FileSystem. This is a problem +because that c++ code is no longer accessing s3 URLs through the botocore Python library. So you get errors about INVALID CREDENTIALS. +The primary solution for testing is to simply use fastparquet instead of pyarrow. The other alternative is to use the moto_server or, as the +comment below suggests, use fsspec's s3fs. At this time (7-2024) we simply use fastparquet for testing. + +From: https://issues.apache.org/jira/browse/ARROW-16437?page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel&focusedCommentId=17530795#comment-17530795 + + I don't think you can use the "mock_s3" method of moto directly with our S3 filesystem integration (as Antoine said, we don't use boto). + A possible way to do this is to use fsspec's s3fs, and then pass this filesystem object (and pyarrow will wrap that in a PyFileSystem), + but that adds another layer of indirection which you don't need here (and which can be another source of failures). + Another option is to use the "moto_server" feature of moto (http://docs.getmoto.org/en/latest/docs/getting_started.html#stand-alone-server-mode), + which gives you an endpoint url, that can be used to construct a pyarrow S3FileSystem that interacts with the moto server. + This is basically the approach that we use ourselves (but with MinIO instead of moto), and eg also dask switched from mock_s3 to + moto_server in their tests (see eg https://github.com/dask/dask/blob/4d6a5f08c45be56302f696ca4ef6038a1cd1e734/dask/bytes/tests/test_s3.py#L84) + +""" + +@pytest.fixture(autouse=True, scope='module') +def aws_credentials(): + """Mocked AWS Credentials for moto.""" + _aws_credentials() + +def _aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + +@pytest.fixture(autouse=True, scope='function') +def run_test(): + # Remove test context before running test + if TEST_CONTEXT in api.ls_contexts(): + api.delete_context(context_name=TEST_CONTEXT) + api.context(context_name=TEST_CONTEXT) + yield + api.delete_context(context_name=TEST_CONTEXT) diff --git a/tests/functional/common.py b/tests/functional/common.py deleted file mode 100644 index ef2b59b..0000000 --- a/tests/functional/common.py +++ /dev/null @@ -1,34 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import pytest - -import disdat.api as api - -TEST_CONTEXT = '___test_context___' - - -@pytest.fixture(autouse=True) -def run_test(): - # Remove test context before running test - setup() - yield - api.delete_context(context_name=TEST_CONTEXT) - - -def setup(): - if TEST_CONTEXT in api.ls_contexts(): - api.delete_context(context_name=TEST_CONTEXT) - - api.context(context_name=TEST_CONTEXT) diff --git a/tests/functional/test_api_exit.py b/tests/functional/test_api_exit.py index 23a75db..03a7093 100644 --- a/tests/functional/test_api_exit.py +++ b/tests/functional/test_api_exit.py @@ -16,11 +16,10 @@ from disdatluigi.pipe import PipeTask from disdatluigi.common import ApplyError -import disdat.api as disdat_api import disdatluigi.api as api import luigi -from tests.functional.common import TEST_CONTEXT, run_test +from tests.conftest import TEST_CONTEXT TEST_NAME = 'test_bundle' diff --git a/tests/functional/test_api_run.py b/tests/functional/test_api_run.py index 6debef6..e917381 100644 --- a/tests/functional/test_api_run.py +++ b/tests/functional/test_api_run.py @@ -19,8 +19,7 @@ import docker import pytest - -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT from tests.functional.common_tasks import COMMON_DEFAULT_ARGS import disdat.api as disdat_api import disdatluigi.api as api @@ -106,7 +105,7 @@ def test_run_local_container(run_test, build_container_setup_only): assert b_a_f.uuid != b_a_f2.uuid -#@moto.mock_s3 +#@moto.mock_aws def manual_test_run_aws_batch(run_test, build_container_setup_only): """ Incomplete test. The container code itself needs to have its S3 access mocked out. Here we are testing manually diff --git a/tests/functional/test_external_bundle.py b/tests/functional/test_external_bundle.py index d6f8c30..b0634de 100644 --- a/tests/functional/test_external_bundle.py +++ b/tests/functional/test_external_bundle.py @@ -20,7 +20,7 @@ import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT # autouse fixture to setup / tear down context +from tests.conftest import TEST_CONTEXT def test(run_test): diff --git a/tests/functional/test_external_dep.py b/tests/functional/test_external_dep.py index 53a19a3..a3b854e 100644 --- a/tests/functional/test_external_dep.py +++ b/tests/functional/test_external_dep.py @@ -19,7 +19,7 @@ import disdat.api as api import disdatluigi.api as dlapi from disdatluigi.common import ApplyError -from tests.functional.common import run_test, TEST_CONTEXT # autouse fixture to setup / tear down context +from tests.conftest import TEST_CONTEXT EXT_BUNDLE_NAME='ext_bundle_human_name' BUNDLE_CONTENTS=list(range(9)) diff --git a/tests/functional/test_force_one_and_all.py b/tests/functional/test_force_one_and_all.py index 5504c94..5e31c89 100644 --- a/tests/functional/test_force_one_and_all.py +++ b/tests/functional/test_force_one_and_all.py @@ -17,7 +17,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT TEST_NAME = 'test_bundle' diff --git a/tests/functional/test_inc_pull.py b/tests/functional/test_inc_pull.py index 258cb85..4bbf0dd 100644 --- a/tests/functional/test_inc_pull.py +++ b/tests/functional/test_inc_pull.py @@ -27,7 +27,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import TEST_CONTEXT +from tests.conftest import TEST_CONTEXT TEST_REMOTE = '__test_remote_context__' @@ -82,11 +82,11 @@ def pipe_run(self, b=None): return {'file': [target.path]} -@moto.mock_s3 +@moto.mock_aws def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) - + # Setup moto s3 resources s3_client = boto3.client('s3') s3_resource = boto3.resource('s3') diff --git a/tests/functional/test_inc_push.py b/tests/functional/test_inc_push.py index 7e79f83..65ce3d1 100644 --- a/tests/functional/test_inc_push.py +++ b/tests/functional/test_inc_push.py @@ -27,7 +27,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import TEST_CONTEXT +from tests.conftest import TEST_CONTEXT TEST_REMOTE = '__test_remote_context__' TEST_BUCKET = 'test-bucket' @@ -79,7 +79,7 @@ def pipe_run(self, b=None): raise Exception -@moto.mock_s3 +@moto.mock_aws def test_add_with_treat_as_bundle(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) diff --git a/tests/functional/test_managed.py b/tests/functional/test_managed.py index 8fccfab..d85ce04 100644 --- a/tests/functional/test_managed.py +++ b/tests/functional/test_managed.py @@ -21,14 +21,17 @@ """ import boto3 import moto +from moto import mock_aws import pandas as pd import pytest import os +from tests.conftest import _aws_credentials + from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT TEST_REMOTE = '__test_remote_context__' TEST_BUCKET = 'test-bucket' @@ -132,7 +135,7 @@ def test_non_managed_local(): 'Local file should be present in bundle' -@moto.mock_s3 +@moto.mock_aws def test_remote_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -165,7 +168,7 @@ def test_remote_push_managed_s3(): assert output_file in keys, 'Pipeline should have pushed file' -@moto.mock_s3 +@moto.mock_aws def test_remote_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -205,7 +208,7 @@ def test_remote_push_non_managed_s3(): assert output_file in keys, 'Pipeline should have pushed file' -@moto.mock_s3 +@moto.mock_aws def test_remote_no_push_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -226,7 +229,7 @@ def test_remote_no_push_managed_s3(): dlapi.apply(TEST_CONTEXT, ManagedS3) -@moto.mock_s3 +@moto.mock_aws def test_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -266,7 +269,7 @@ def test_no_remote_push_managed_s3(): dlapi.apply(TEST_CONTEXT, ManagedS3, incremental_push=True) -@moto.mock_s3 +@moto.mock_aws def test_no_remote_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -301,7 +304,7 @@ def test_no_remote_no_push_managed_s3(): dlapi.apply(TEST_CONTEXT, ManagedS3) -@moto.mock_s3 +@moto.mock_aws def test_no_remote_no_push_non_managed_s3(): api.delete_context(TEST_CONTEXT) api.context(context_name=TEST_CONTEXT) @@ -328,7 +331,6 @@ def test_no_remote_no_push_non_managed_s3(): if __name__ == '__main__': - #test_remote_push_managed_s3() pytest.main([__file__]) diff --git a/tests/functional/test_mark_force.py b/tests/functional/test_mark_force.py index 6f27794..bac8fee 100644 --- a/tests/functional/test_mark_force.py +++ b/tests/functional/test_mark_force.py @@ -16,7 +16,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT TEST_NAME = 'test_bundle' diff --git a/tests/functional/test_pipeline.py b/tests/functional/test_pipeline.py index 330467e..6be009a 100644 --- a/tests/functional/test_pipeline.py +++ b/tests/functional/test_pipeline.py @@ -17,7 +17,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT class A(PipeTask): diff --git a/tests/functional/test_requires.py b/tests/functional/test_requires.py index 30c3465..e34bbf4 100644 --- a/tests/functional/test_requires.py +++ b/tests/functional/test_requires.py @@ -16,7 +16,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT """ Purpose of this test is to show that if you return nothing, you diff --git a/tests/functional/test_reuse_logic.py b/tests/functional/test_reuse_logic.py index 0956ca5..2790296 100644 --- a/tests/functional/test_reuse_logic.py +++ b/tests/functional/test_reuse_logic.py @@ -17,7 +17,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT, setup +from tests.conftest import TEST_CONTEXT """ This set of tests verifies the behavior of Disdat's re-execution / data re-use logic. diff --git a/tests/functional/test_task_inception.py b/tests/functional/test_task_inception.py index 650e38a..579f196 100644 --- a/tests/functional/test_task_inception.py +++ b/tests/functional/test_task_inception.py @@ -17,7 +17,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT """ Test ability to call pipelines in pipelines diff --git a/tests/functional/test_task_yield.py b/tests/functional/test_task_yield.py index d557038..b10f54f 100644 --- a/tests/functional/test_task_yield.py +++ b/tests/functional/test_task_yield.py @@ -17,7 +17,7 @@ from disdatluigi.pipe import PipeTask import disdat.api as api import disdatluigi.api as dlapi -from tests.functional.common import run_test, TEST_CONTEXT +from tests.conftest import TEST_CONTEXT """ Tests for dynamic dependencies using Disdat-Luigi diff --git a/tox.ini b/tox.ini index 91162d6..622efdb 100644 --- a/tox.ini +++ b/tox.ini @@ -4,21 +4,21 @@ # and then run "tox" from this directory. [tox] -envlist = clean,py37,py38 +envlist = clean,py{38,39,310,311} skip_missing_interpreters=true [testenv] deps = pytest pytest-cov - moto==1.3.14 + moto>=5 coverage - pyarrow + fastparquet s3fs<=0.4.2 passenv = HOME setenv = MP_CONTEXT_TYPE = fork commands = - pytest tests/functional --disable-warnings --cov-append --cov=disdat --cov-report html + pytest tests/functional --disable-warnings --cov-append --cov=disdat --cov-report html pytest tests/bundles --disable-warnings --cov-append --cov=disdat --cov-report html [testenv:clean]