⚡️ Speed up method S3DataSource.put_bucket_policy by 125%
#629
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 125% (1.25x) speedup for
S3DataSource.put_bucket_policyinbackend/python/app/sources/external/s3/s3.py⏱️ Runtime :
1.29 milliseconds→574 microseconds(best of250runs)📝 Explanation and details
The optimized code achieves a 124% speedup (from 1.29ms to 574μs) by eliminating the expensive async context manager overhead in the S3 client creation process.
Key optimization applied:
async with session.client('s3') as s3_client:with a direct call toawait self._get_async_s3_client()Why this optimization works:
__aenter__) and exit (__aexit__) operationsasync with session.client('s3')was taking 1.55ms (24.9% of total time) in the original versionPerformance impact analysis:
Workload suitability:
This optimization is especially beneficial for:
The optimization maintains identical functionality and error handling while significantly reducing the overhead of S3 client management.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import asyncio # used to run async functions
Patch aioboto3 and botocore.exceptions for the tested class
import sys
import types
Now import the function under test, preserving its original logic
Mocks and stubs for S3Client, S3RESTClientViaAccessKey, S3Response, and aioboto3
from typing import Optional
import pytest # used for our unit tests
from app.sources.external.s3.s3 import S3DataSource
Minimal S3RESTClientViaAccessKey stub
class S3RESTClientViaAccessKey:
def get_session(self):
return MockAioboto3Session()
Minimal S3Client using the stub above
class S3Client:
def init(self, client):
self.client = client
def get_session(self):
return self.client.get_session()
Mock aioboto3 Session and Client
class MockAioboto3Session:
def client(self, service_name):
return MockAsyncContextManager(MockS3Client())
class MockAsyncContextManager:
def init(self, obj):
self.obj = obj
async def aenter(self):
return self.obj
async def aexit(self, exc_type, exc, tb):
pass
Mock S3 client with put_bucket_policy method
class MockS3Client:
def init(self):
self._response = None
self._raise_client_error = False
self._raise_exception = False
self._last_kwargs = None
class MockClientError(Exception):
def init(self):
self.response = {'Error': {'Code': 'AccessDenied', 'Message': 'You do not have permission to access this bucket.'}}
@pytest.fixture
def s3_data_source_with_custom_client():
"""Fixture for S3DataSource with a custom mock S3 client."""
client = S3Client(S3RESTClientViaAccessKey())
ds = S3DataSource(client)
return ds
-------- BASIC TEST CASES --------
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_success(s3_data_source_with_custom_client):
"""Test basic successful put_bucket_policy call."""
ds = s3_data_source_with_custom_client
# Patch the session to always return our custom S3 client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
# Simulate a normal response
mock_client._response = {"ResponseMetadata": {"HTTPStatusCode": 200}, "Result": "ok"}
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy("bucket1", '{"Version":"2012-10-17"}')
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_optional_parameters(s3_data_source_with_custom_client):
"""Test put_bucket_policy with all optional parameters provided."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
mock_client._response = {"ResponseMetadata": {"HTTPStatusCode": 200}, "Result": "ok"}
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy(
"bucket2", '{"Version":"2012-10-17"}',
ChecksumAlgorithm="SHA256",
ConfirmRemoveSelfBucketAccess=True,
ExpectedBucketOwner="ownerid"
)
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_empty_response(s3_data_source_with_custom_client):
"""Test put_bucket_policy when S3 returns None (empty response)."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
mock_client._response = None
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy("bucket3", '{"Version":"2012-10-17"}')
-------- EDGE TEST CASES --------
@pytest.mark.asyncio
async def test_put_bucket_policy_client_error(s3_data_source_with_custom_client):
"""Test put_bucket_policy when S3 client raises ClientError."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
mock_client._raise_client_error = True
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy("bucket4", '{"Version":"2012-10-17"}')
@pytest.mark.asyncio
async def test_put_bucket_policy_unexpected_exception(s3_data_source_with_custom_client):
"""Test put_bucket_policy when S3 client raises an unexpected exception."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
mock_client._raise_exception = True
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy("bucket5", '{"Version":"2012-10-17"}')
@pytest.mark.asyncio
async def test_put_bucket_policy_concurrent_calls(s3_data_source_with_custom_client):
"""Test concurrent execution of put_bucket_policy."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
@pytest.mark.asyncio
async def test_put_bucket_policy_error_response_dict(s3_data_source_with_custom_client):
"""Test put_bucket_policy with error response dict from S3."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
# Simulate error response dict
mock_client._response = {"Error": {"Code": "InvalidPolicy", "Message": "Policy is invalid"}}
session.client = lambda service_name: MockAsyncContextManager(mock_client)
result = await ds.put_bucket_policy("bucket6", '{"Version":"2012-10-17"}')
-------- LARGE SCALE TEST CASES --------
@pytest.mark.asyncio
async def test_put_bucket_policy_large_scale_concurrent(s3_data_source_with_custom_client):
"""Test large scale concurrent put_bucket_policy calls (up to 50)."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
@pytest.mark.asyncio
async def test_put_bucket_policy_large_scale_error_mix(s3_data_source_with_custom_client):
"""Test large scale concurrent calls with some errors."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
# Create 50 mock clients, some raise errors
mock_clients = []
for i in range(50):
mc = MockS3Client()
if i % 10 == 0:
mc._raise_client_error = True
mock_clients.append(mc)
ds._session = session
-------- THROUGHPUT TEST CASES --------
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_small_load(s3_data_source_with_custom_client):
"""Throughput: test small load (5 concurrent calls)."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_medium_load(s3_data_source_with_custom_client):
"""Throughput: test medium load (20 concurrent calls)."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_high_volume(s3_data_source_with_custom_client):
"""Throughput: test high volume load (100 concurrent calls)."""
ds = s3_data_source_with_custom_client
session = MockAioboto3Session()
mock_client = MockS3Client()
session._client = mock_client
ds._session = session
session.client = lambda service_name: MockAsyncContextManager(mock_client)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import asyncio # used to run async functions
Mocks and test helpers
from typing import Optional
import pytest # used for our unit tests
from app.sources.external.s3.s3 import S3DataSource
Minimal S3Client mock
class S3Client:
def init(self, session):
self._session = session
def get_session(self):
return self._session
Minimal aioboto3.Session mock
class MockSession:
def init(self, client_factory):
self._client_factory = client_factory
async def client(self, service_name):
return await self._client_factory()
Minimal S3 client with async context manager and put_bucket_policy
class MockS3Client:
def init(self, response=None, raise_client_error=False, raise_generic=False):
self._response = response
self._raise_client_error = raise_client_error
self._raise_generic = raise_generic
Patch ClientError for S3DataSource
class ClientError(Exception):
def init(self, code="AccessDenied", message="You do not have permission"):
self.response = {'Error': {'Code': code, 'Message': message}}
-------------------- UNIT TESTS --------------------
Basic Test Case 1: Successful put_bucket_policy with required parameters
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_success():
# Simulate S3 returns success with dict
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
# Await the async function
result = await datasource.put_bucket_policy(Bucket="my-bucket", Policy="{}")
Basic Test Case 2: Successful put_bucket_policy with all optional parameters
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_all_params():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
result = await datasource.put_bucket_policy(
Bucket="bucket",
Policy="{}",
ChecksumAlgorithm="SHA256",
ConfirmRemoveSelfBucketAccess=True,
ExpectedBucketOwner="owner123"
)
Basic Test Case 3: S3 returns error in response dict
@pytest.mark.asyncio
async def test_put_bucket_policy_basic_s3_error_response():
response = {"Error": {"Code": "MalformedPolicy", "Message": "Policy is invalid"}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
result = await datasource.put_bucket_policy(Bucket="bucket", Policy="{}")
Edge Test Case 1: S3 returns None response
@pytest.mark.asyncio
async def test_put_bucket_policy_edge_none_response():
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=None))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
result = await datasource.put_bucket_policy(Bucket="bucket", Policy="{}")
Edge Test Case 2: S3 client raises ClientError exception
@pytest.mark.asyncio
async def test_put_bucket_policy_edge_client_error():
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(raise_client_error=True))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
result = await datasource.put_bucket_policy(Bucket="bucket", Policy="{}")
Edge Test Case 3: S3 client raises generic exception
@pytest.mark.asyncio
async def test_put_bucket_policy_edge_generic_exception():
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(raise_generic=True))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
result = await datasource.put_bucket_policy(Bucket="bucket", Policy="{}")
Edge Test Case 4: Concurrent execution with valid responses
@pytest.mark.asyncio
async def test_put_bucket_policy_edge_concurrent_success():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(5)
]
results = await asyncio.gather(*coros)
for result in results:
pass
Edge Test Case 5: Concurrent execution with mixed error/success
@pytest.mark.asyncio
async def test_put_bucket_policy_edge_concurrent_mixed():
# First 3 succeed, last 2 error
responses = [
{"ResponseMetadata": {"HTTPStatusCode": 200}},
{"ResponseMetadata": {"HTTPStatusCode": 200}},
{"ResponseMetadata": {"HTTPStatusCode": 200}},
None,
{"Error": {"Code": "MalformedPolicy", "Message": "Policy is invalid"}}
]
def factory_gen():
# Use closure to pop responses
responses_copy = list(responses)
async def factory():
resp = responses_copy.pop(0)
return MockS3Client(response=resp)
return factory
session = MockSession(factory_gen())
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(5)
]
results = await asyncio.gather(*coros)
# First 3 should succeed
for i in range(3):
pass
Large Scale Test Case 1: Many concurrent successful calls
@pytest.mark.asyncio
async def test_put_bucket_policy_large_scale_many_success():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(50)
]
results = await asyncio.gather(*coros)
for result in results:
pass
Large Scale Test Case 2: Many concurrent calls with alternating errors
@pytest.mark.asyncio
async def test_put_bucket_policy_large_scale_mixed():
# Alternate between success and error
def factory_gen():
count = [0]
async def factory():
if count[0] % 2 == 0:
resp = {"ResponseMetadata": {"HTTPStatusCode": 200}}
else:
resp = {"Error": {"Code": "MalformedPolicy", "Message": "Policy is invalid"}}
count[0] += 1
return MockS3Client(response=resp)
return factory
session = MockSession(factory_gen())
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(20)
]
results = await asyncio.gather(*coros)
for i, result in enumerate(results):
if i % 2 == 0:
pass
else:
pass
Throughput Test Case 1: Small load
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_small_load():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(5)
]
results = await asyncio.gather(*coros)
Throughput Test Case 2: Medium load
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_medium_load():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(20)
]
results = await asyncio.gather(*coros)
Throughput Test Case 3: High volume load (100 calls)
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_high_volume():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
coros = [
datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
for i in range(100)
]
results = await asyncio.gather(*coros)
Throughput Test Case 4: Sustained execution pattern (repeat calls)
@pytest.mark.asyncio
async def test_put_bucket_policy_throughput_sustained_execution():
response = {"ResponseMetadata": {"HTTPStatusCode": 200}}
session = MockSession(lambda: asyncio.sleep(0) or MockS3Client(response=response))
s3_client = S3Client(session)
datasource = S3DataSource(s3_client)
# Call function 10 times in sequence
for i in range(10):
result = await datasource.put_bucket_policy(Bucket=f"bucket-{i}", Policy="{}")
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-S3DataSource.put_bucket_policy-mhx8ke8pand push.