Skip to content

Commit

Permalink
Updating disdat, luigi, docker to latest (#2)
Browse files Browse the repository at this point in the history
* updating disdat, luigi, docker to latest
* Updated the dependencies here to move to Disdat 1.1.3,
* Moved Docker and Luigi dependencies forward.
* Updated tests to use Mock >5, as well as use standard pytest conftest.py file.
* changed few logger.warn to logger.warning
* sdist creation now uses '_' for canonical naming
* tests require fastparquet, because pyarrow's s3fs implementation side-steps Moto.

* removing debug code

---------

Co-authored-by: Ken Yocum <ken_yocum@intuit.com>
  • Loading branch information
kyocum and Ken Yocum authored Jul 2, 2024
1 parent bd6a973 commit 6b32cab
Show file tree
Hide file tree
Showing 23 changed files with 104 additions and 79 deletions.
8 changes: 4 additions & 4 deletions build-dist.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ echo "Building Disdat package for local installation or PyPi . . ."
# and then: git tag <version>

# Remove the prior tar ball from the context.template
rm -rf disdatluigi/infrastructure/dockerizer/context.template/disdat-luigi-*.tar.gz
rm -rf dist/disdat-luigi-*.tar.gz
rm -rf disdatluigi/infrastructure/dockerizer/context.template/disdat_luigi-*.tar.gz
rm -rf dist/disdat_luigi-*.tar.gz

# Create a new sdist
python setup.py sdist

# Copy over to the context.template.
cp dist/disdat-luigi-*.tar.gz disdatluigi/infrastructure/dockerizer/context.template/.
cp dist/disdat_luigi-*.tar.gz disdatluigi/infrastructure/dockerizer/context.template/.

# Create a new sdist that will have that tar.gz in the template
python setup.py sdist
Expand All @@ -25,7 +25,7 @@ if false; then
#twine upload --repository-url https://test.pypi.org/legacy/ dist/disdat-*.tar.gz
# Test: pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple disdat
# now do it for real
twine upload dist/disdat-luigi-*.tar.gz
twine upload dist/disdat_luigi-*.tar.gz
fi

echo "Finished"
Expand Down
4 changes: 2 additions & 2 deletions disdatluigi/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def apply(output_bundle, pipe_params, pipe_cls, input_tags, output_tags, force,
_logger.debug("incremental_pull {}".format(incremental_pull))

if incremental_push:
_logger.warn("incremental_push {}".format(incremental_push))
_logger.warning("incremental_push {}".format(incremental_push))

if incremental_pull:
_logger.warn("incremental_pull {}".format(incremental_pull))
_logger.warning("incremental_pull {}".format(incremental_pull))

pfs = fs.DisdatFS()

Expand Down
2 changes: 1 addition & 1 deletion disdatluigi/entrypoints/sagemaker_ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def train():
_logger.error("Disdat SageMaker train entrypoint failed.")
sys.exit(os.EX_IOERR)
elif args.purpose == 'serve':
_logger.warn("Disdat does not yet support SageMaker serve.")
_logger.warning("Disdat does not yet support SageMaker serve.")
sys.exit(os.EX_UNAVAILABLE)
else:
_logger.error("Disdat SageMaker invoked entrypoint with {}, not 'train' or 'serve'".format(args.purpose))
Expand Down
2 changes: 1 addition & 1 deletion disdatluigi/utility/aws_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def ecr_create_fq_respository_name(repository_name, policy_resource_package=None
)
repository_metadata = response['repositories'][0]
elif e.response['Error']['Code'] == 'AccessDeniedException':
_logger.warn("Error [AccessDeniedException] when creating repo {}, trying to continue...".format(repository_name))
_logger.warning("Error [AccessDeniedException] when creating repo {}, trying to continue...".format(repository_name))
else:
raise e
return repository_metadata['repositoryUri']
Expand Down
9 changes: 5 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@
# If <= means higher versions broke something.

install_requires=[
'disdat>=1.0.0',
'luigi>=3.0,<=3.1',
'disdat>=1.1.3,<1.2',
'luigi>=3.0,<3.6',
'boto3>=1.14.49,<2.0',
'docker>=4.1.0,<4.4.0',
'docker>=7.0.0,<7.2.0',
],

# List additional groups of dependencies here (e.g. development
Expand All @@ -108,7 +108,8 @@
'pylint',
'coverage',
'tox',
'moto',
'moto>=5',
'fastparquet',
's3fs<=0.4.2' # 0.5.0 breaks with aiobotocore and missing AWS headers
],
'rel': [
Expand Down
4 changes: 2 additions & 2 deletions tests/bundles/test_arg_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import disdat.api as dsdt_api
from disdatluigi.pipe import PipeTask
import disdatluigi.api as api
from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT

test_luigi_args_data = {'str_arg': 'some string',
'int_arg': 10,
Expand All @@ -39,7 +39,7 @@ def pipe_run(self):
return True


def test_luigi_args(run_test):
def test_luigi_args():
""" Create a task, store args, retrieve from bundle api.
Pass in python objects as the values for Luigi parameters.
Stored as serialized json objects. Bundle presents the parameters
Expand Down
58 changes: 58 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import pytest
import disdat.api as api

TEST_CONTEXT = '___test_context___'

"""
NOTE: When testing we try to use Moto to mock-out the s3 calls. Pyarrow sidesteps boto by using its own S3FileSystem. This is a problem
because that c++ code is no longer accessing s3 URLs through the botocore Python library. So you get errors about INVALID CREDENTIALS.
The primary solution for testing is to simply use fastparquet instead of pyarrow. The other alternative is to use the moto_server or, as the
comment below suggests, use fsspec's s3fs. At this time (7-2024) we simply use fastparquet for testing.
From: https://issues.apache.org/jira/browse/ARROW-16437?page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel&focusedCommentId=17530795#comment-17530795
I don't think you can use the "mock_s3" method of moto directly with our S3 filesystem integration (as Antoine said, we don't use boto).
A possible way to do this is to use fsspec's s3fs, and then pass this filesystem object (and pyarrow will wrap that in a PyFileSystem),
but that adds another layer of indirection which you don't need here (and which can be another source of failures).
Another option is to use the "moto_server" feature of moto (http://docs.getmoto.org/en/latest/docs/getting_started.html#stand-alone-server-mode),
which gives you an endpoint url, that can be used to construct a pyarrow S3FileSystem that interacts with the moto server.
This is basically the approach that we use ourselves (but with MinIO instead of moto), and eg also dask switched from mock_s3 to
moto_server in their tests (see eg https://github.com/dask/dask/blob/4d6a5f08c45be56302f696ca4ef6038a1cd1e734/dask/bytes/tests/test_s3.py#L84)
"""

@pytest.fixture(autouse=True, scope='module')
def aws_credentials():
"""Mocked AWS Credentials for moto."""
_aws_credentials()

def _aws_credentials():
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"

@pytest.fixture(autouse=True, scope='function')
def run_test():
# Remove test context before running test
if TEST_CONTEXT in api.ls_contexts():
api.delete_context(context_name=TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
yield
api.delete_context(context_name=TEST_CONTEXT)
34 changes: 0 additions & 34 deletions tests/functional/common.py

This file was deleted.

3 changes: 1 addition & 2 deletions tests/functional/test_api_exit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@

from disdatluigi.pipe import PipeTask
from disdatluigi.common import ApplyError
import disdat.api as disdat_api
import disdatluigi.api as api
import luigi

from tests.functional.common import TEST_CONTEXT, run_test
from tests.conftest import TEST_CONTEXT

TEST_NAME = 'test_bundle'

Expand Down
5 changes: 2 additions & 3 deletions tests/functional/test_api_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
import docker
import pytest


from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT
from tests.functional.common_tasks import COMMON_DEFAULT_ARGS
import disdat.api as disdat_api
import disdatluigi.api as api
Expand Down Expand Up @@ -106,7 +105,7 @@ def test_run_local_container(run_test, build_container_setup_only):
assert b_a_f.uuid != b_a_f2.uuid


#@moto.mock_s3
#@moto.mock_aws
def manual_test_run_aws_batch(run_test, build_container_setup_only):
""" Incomplete test. The container code itself needs to have
its S3 access mocked out. Here we are testing manually
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_external_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import disdat.api as api
import disdatluigi.api as dlapi

from tests.functional.common import run_test, TEST_CONTEXT # autouse fixture to setup / tear down context
from tests.conftest import TEST_CONTEXT


def test(run_test):
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_external_dep.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import disdat.api as api
import disdatluigi.api as dlapi
from disdatluigi.common import ApplyError
from tests.functional.common import run_test, TEST_CONTEXT # autouse fixture to setup / tear down context
from tests.conftest import TEST_CONTEXT

EXT_BUNDLE_NAME='ext_bundle_human_name'
BUNDLE_CONTENTS=list(range(9))
Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_force_one_and_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT

TEST_NAME = 'test_bundle'

Expand Down
6 changes: 3 additions & 3 deletions tests/functional/test_inc_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import TEST_CONTEXT
from tests.conftest import TEST_CONTEXT


TEST_REMOTE = '__test_remote_context__'
Expand Down Expand Up @@ -82,11 +82,11 @@ def pipe_run(self, b=None):
return {'file': [target.path]}


@moto.mock_s3
@moto.mock_aws
def test_add_with_treat_as_bundle():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)

# Setup moto s3 resources
s3_client = boto3.client('s3')
s3_resource = boto3.resource('s3')
Expand Down
4 changes: 2 additions & 2 deletions tests/functional/test_inc_push.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import TEST_CONTEXT
from tests.conftest import TEST_CONTEXT

TEST_REMOTE = '__test_remote_context__'
TEST_BUCKET = 'test-bucket'
Expand Down Expand Up @@ -79,7 +79,7 @@ def pipe_run(self, b=None):
raise Exception


@moto.mock_s3
@moto.mock_aws
def test_add_with_treat_as_bundle():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand Down
18 changes: 10 additions & 8 deletions tests/functional/test_managed.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@
"""
import boto3
import moto
from moto import mock_aws
import pandas as pd
import pytest
import os

from tests.conftest import _aws_credentials

from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT

TEST_REMOTE = '__test_remote_context__'
TEST_BUCKET = 'test-bucket'
Expand Down Expand Up @@ -132,7 +135,7 @@ def test_non_managed_local():
'Local file should be present in bundle'


@moto.mock_s3
@moto.mock_aws
def test_remote_push_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand Down Expand Up @@ -165,7 +168,7 @@ def test_remote_push_managed_s3():
assert output_file in keys, 'Pipeline should have pushed file'


@moto.mock_s3
@moto.mock_aws
def test_remote_push_non_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand Down Expand Up @@ -205,7 +208,7 @@ def test_remote_push_non_managed_s3():
assert output_file in keys, 'Pipeline should have pushed file'


@moto.mock_s3
@moto.mock_aws
def test_remote_no_push_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand All @@ -226,7 +229,7 @@ def test_remote_no_push_managed_s3():
dlapi.apply(TEST_CONTEXT, ManagedS3)


@moto.mock_s3
@moto.mock_aws
def test_remote_no_push_non_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand Down Expand Up @@ -266,7 +269,7 @@ def test_no_remote_push_managed_s3():
dlapi.apply(TEST_CONTEXT, ManagedS3, incremental_push=True)


@moto.mock_s3
@moto.mock_aws
def test_no_remote_push_non_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand Down Expand Up @@ -301,7 +304,7 @@ def test_no_remote_no_push_managed_s3():
dlapi.apply(TEST_CONTEXT, ManagedS3)


@moto.mock_s3
@moto.mock_aws
def test_no_remote_no_push_non_managed_s3():
api.delete_context(TEST_CONTEXT)
api.context(context_name=TEST_CONTEXT)
Expand All @@ -328,7 +331,6 @@ def test_no_remote_no_push_non_managed_s3():


if __name__ == '__main__':
#test_remote_push_managed_s3()
pytest.main([__file__])


Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_mark_force.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT

TEST_NAME = 'test_bundle'

Expand Down
2 changes: 1 addition & 1 deletion tests/functional/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from disdatluigi.pipe import PipeTask
import disdat.api as api
import disdatluigi.api as dlapi
from tests.functional.common import run_test, TEST_CONTEXT
from tests.conftest import TEST_CONTEXT


class A(PipeTask):
Expand Down
Loading

0 comments on commit 6b32cab

Please sign in to comment.