From ef1f2b8d7ab8204f2d1cf27d250c1ed57ac60a44 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Tue, 28 Mar 2017 09:30:18 -0700 Subject: [PATCH 1/3] Defer S3 Client instantiation until needed Previously the S3 Client was instantiated in the `__init__` method of S3Target, which meant that if you had to, say, traverse a large task graph, quickly creating and throwing away targets, you'd end up incurring a lot of wasted connections and slowness. Now we create the client only when we want to make a remote request. This does not change any of the semantics of how the target is instantiated (still can override `s3` property and the `Key` property), except that making an STS connection is also deferred until the moment it's actually used. --- luigi/contrib/s3.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/luigi/contrib/s3.py b/luigi/contrib/s3.py index d01995fb79..06515912d5 100644 --- a/luigi/contrib/s3.py +++ b/luigi/contrib/s3.py @@ -75,13 +75,31 @@ class S3Client(FileSystem): def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, **kwargs): + from boto.s3.key import Key + options = self._get_s3_config() + options.update(kwargs) + if aws_access_key_id: + options['aws_access_key_id'] = aws_access_key_id + if aws_secret_access_key: + options['aws_secret_access_key'] = aws_secret_access_key + + self.Key = Key + self._options = options + + @property + def s3(self): # only import boto when needed to allow top-lvl s3 module import import boto import boto.s3.connection - from boto.s3.key import Key - options = self._get_s3_config() - options.update(kwargs) + options = dict(self._options) + + if getattr(self, '_s3', None): + return self._s3 + + aws_access_key_id = options.get('aws_access_key_id') + aws_secret_access_key = options.get('aws_secret_access_key') + # Removing key args would break backwards compability role_arn = options.get('aws_role_arn') role_session_name = options.get('aws_role_session_name') @@ -97,22 +115,14 @@ def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, aws_access_key_id = assumed_role.credentials.access_key aws_session_token = assumed_role.credentials.session_token - else: - if not aws_access_key_id: - aws_access_key_id = options.get('aws_access_key_id') - - if not aws_secret_access_key: - aws_secret_access_key = options.get('aws_secret_access_key') - for key in ['aws_access_key_id', 'aws_secret_access_key', 'aws_role_session_name', 'aws_role_arn']: if key in options: options.pop(key) - - self.s3 = boto.s3.connection.S3Connection(aws_access_key_id, - aws_secret_access_key, - security_token=aws_session_token, - **options) - self.Key = Key + self._s3 = boto.s3.connection.S3Connection(aws_access_key_id, + aws_secret_access_key, + security_token=aws_session_token, + **options) + return self._s3 def exists(self, path): """ From 92d527ea70c9581c509ce83a26a48fbcef1e9ef5 Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Tue, 28 Mar 2017 09:58:32 -0700 Subject: [PATCH 2/3] Add S3 setter and deleter methods --- luigi/contrib/s3.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/luigi/contrib/s3.py b/luigi/contrib/s3.py index 06515912d5..2e255c3cab 100644 --- a/luigi/contrib/s3.py +++ b/luigi/contrib/s3.py @@ -124,6 +124,15 @@ def s3(self): **options) return self._s3 + @s3.setter + def s3(self, value): + self._s3 = value + + @s3.deleter + def s3(self): + if hasattr(self, '_s3'): + del self._s3 + def exists(self, path): """ Does provided path exist on S3? From e2694cc134ebc2b36a64102cf086be044443ff4d Mon Sep 17 00:00:00 2001 From: Jeff Tratner Date: Tue, 28 Mar 2017 10:47:56 -0700 Subject: [PATCH 3/3] Ensure _s3 is always defined for more pythonic checks --- luigi/contrib/s3.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/luigi/contrib/s3.py b/luigi/contrib/s3.py index 2e255c3cab..4d77766756 100644 --- a/luigi/contrib/s3.py +++ b/luigi/contrib/s3.py @@ -73,6 +73,8 @@ class S3Client(FileSystem): boto-powered S3 client. """ + _s3 = None + def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, **kwargs): from boto.s3.key import Key @@ -94,7 +96,7 @@ def s3(self): options = dict(self._options) - if getattr(self, '_s3', None): + if self._s3: return self._s3 aws_access_key_id = options.get('aws_access_key_id') @@ -128,11 +130,6 @@ def s3(self): def s3(self, value): self._s3 = value - @s3.deleter - def s3(self): - if hasattr(self, '_s3'): - del self._s3 - def exists(self, path): """ Does provided path exist on S3?